From f1819fb0f4ce3620cbac7b399e925ed8abe66b09 Mon Sep 17 00:00:00 2001 From: Colin Patrick Mccabe Date: Sat, 26 Oct 2013 21:06:25 -0700 Subject: [PATCH] some fixups to snarf_mail scripts * rename snarf_mail.rb to snarf_mail_imap.rb * rename snarf_gmail.rb to snarf_mail_pop.rb imap script: make --username argument more consistent with other arguments. Fix syntax error with opts.delete. pop script: we don't actually need the 'password' gem for this, so don't include. Signed-off-by: Colin McCabe --- snarf_gmail.rb | 86 ----------------------- snarf_mail.rb | 192 ---------------------------------------------------- snarf_mail_imap.rb | 192 ++++++++++++++++++++++++++++++++++++++++++++++++++++ snarf_mail_pop.rb | 84 +++++++++++++++++++++++ 4 files changed, 276 insertions(+), 278 deletions(-) delete mode 100755 snarf_gmail.rb delete mode 100755 snarf_mail.rb create mode 100755 snarf_mail_imap.rb create mode 100755 snarf_mail_pop.rb diff --git a/snarf_gmail.rb b/snarf_gmail.rb deleted file mode 100755 index 52aed0e..0000000 --- a/snarf_gmail.rb +++ /dev/null @@ -1,86 +0,0 @@ -#!/usr/bin/env ruby - -# -# snarf_gmail.rb -# -# Copies mail from a gmail account -# You need ruby 1.9 for this -# You need the password gem for this -# -# Problem: this appears to only download some mails (usually around 383 or -# so). One workaround is to run this multiple times. Still not sure if there -# is a better workaround. -# -# Colin McCabe -# - -require 'net/pop' -require 'optparse' -require 'ostruct' -require 'password' - -class MyOptions - def self.parse(args) - opts = OpenStruct.new - - # Fill in $opts values - parser = OptionParser.new do |myparser| - myparser.banner = "Usage: #{ File.basename($0) } [opts]" - myparser.separator("Specific options:") - myparser.on("--username USERNAME", "-u", - "Email account to fetch. (example: \ -RareCactus@gmail.com)") do |u| - opts.username = u - end - myparser.on("--dry-run", "-d", - "Dry run. State what would be done without actually \ -doing it.") do |a| - opts.dry_run = true - end - end - - parser.parse!(args) - raise "must give a username" unless opts.username - return opts - end -end - -# MAIN -begin - $opts = MyOptions.parse(ARGV) -rescue Exception => msg - $stderr.print("#{msg}.\nType --help to see usage information.\n") - exit 1 -end - -puts "type password for #{$opts.username}" -password = gets -password.chomp! - -Net::POP3.enable_ssl(OpenSSL::SSL::VERIFY_NONE) -Net::POP3.start("pop.gmail.com", 995, $opts.username, password) do |pop| - #pop.reset() - mails = pop.mails - n_mails = pop.n_mails - puts "found #{n_mails} mails." - if ($opts.dry_run) - puts "successfully connected." - exit 0 - end - count = 0 - mails.each do |mail| - fname = mail.unique_id - #fname = sprintf("%08d", count) - File.open(fname, 'w+') do|f| - f.write mail.pop - end - count = count + 1 - if ((count % 100) == 0) - count = 0 - print "." - STDOUT.flush - end - end -end -puts "done." -exit 0 diff --git a/snarf_mail.rb b/snarf_mail.rb deleted file mode 100755 index 76c831c..0000000 --- a/snarf_mail.rb +++ /dev/null @@ -1,192 +0,0 @@ -#!/usr/bin/env ruby - -# -# snarf_mail.rb -# -# Copies mail from an IMAP account -# -# Handy reference: -# http://ruby-doc.org/stdlib/libdoc/net/imap/rdoc/index.html -# - -require 'date' -require 'net/imap' -require 'optparse' -require 'ostruct' - -class MyOptions - def self.parse(args) - opts = OpenStruct.new - opts.mailboxes = Array.new - opts.delete = "none" - - # Fill in $opts values - parser = OptionParser.new do |myparser| - myparser.banner = "Usage: #{ File.basename($0) } [opts]" - myparser.separator("Specific options:") - myparser.on("--delete POLICY", "-d", - "Set delete policy to 'none' or 'old'. Default is 'none'.") do |d| - opts.delete = d - end - myparser.on("--username USERNAME", "-u", - "Email account to fetch. (example: \ -RareCactus@gmail.com)") do |u| - opts.username = u - end - myparser.on("--list-folders", "-l", - "List the IMAP folders that are present.") do |a| - raise "can only specify one action" if (opts.action) - opts.action = :list - end - myparser.on("--snarf", "-S", - "Copy mail to the current directory.") do |a| - raise "can only specify one action" if (opts.action) - opts.action = :snarf - end - myparser.on("--box [MAILBOX]", "-b", - "Act on a given mailbox. You may specify -b more than once for \ -multiple mailboxes.") do |a| - opts.mailboxes << a - end - myparser.on("--server [SERVER]", "-s", - "Email server to use") do |u| - opts.server = u - end - end - - parser.parse!(args) - raise "must specify an action" unless opts.action - raise "must give a username" unless opts.username - raise "must give a server" unless opts.server - return opts - end -end - -# Get a password from STDIN without echoing it. -# This is kind of ugly, but it does work. -def get_password(prompt) - shell_cmds = 'stty -echo && read password && echo ${password}' - printf "#{prompt}" - STDOUT.flush - pass = "" - pipe = IO.popen(shell_cmds, "r") do |pipe| - pass = pipe.read - end - echo_status = $?.exitstatus - system("stty sane") - puts - if (echo_status != 0) then - raise "get_password: error executing: #{shell_cmds}" - end - return pass.chomp -end - -def format_uid(uid) - # We don't know how to deal with non-numeric UIDs. Best just to leave them - # alone. - return uid if (uid =~ /[^0123456789]/) - - # Pad numeric uids out to 6 digits - return sprintf("%006d", uid) -end - -def format_date(date) - date.gsub!(' ', '_') -end - -def get_sanitized_email_name(mailbox, arr) - msn = mailbox.dup - msn.gsub!(' ', '_') - msn.gsub!('/', '.') - return "#{msn}_#{format_date(arr["INTERNALDATE"])}_#{format_uid(arr["UID"])}" -end - -def write_email_to_disk(mailbox, data) - arr = data[0].attr - filename = get_sanitized_email_name(mailbox, arr) - fp = File.open(filename, 'w') - fp.write(arr["RFC822.HEADER"]) - fp.write(arr["RFC822.TEXT"]) - fp.close -end - -def snarf_mailbox(imap, mailbox) - full_count = 0 - first_time = true - - searchterms = [ "NOT", "DELETED" ] - if $opts.delete == "old" - t = Date.today() - 365 - time_str = t.strftime("%e-%b-%Y") - searchterms << "BEFORE" << time_str - prequel = "fetched and deleted: " - elsif $opts.delete == "none" - prequel = "fetched: " - else - raise "expected one of 'old', 'none' for delete argument." - end - - while true - count = 0 - msg_seqnos = Array.new - - imap.select(mailbox) - imap.search(searchterms).each do |message_id| - if (first_time == true) then - # Print a dot immediately after making first contact with the server. - # It is reassuring to the user. - printf(".") - STDOUT.flush() - first_time = false - end - data = imap.fetch(message_id, - [ "INTERNALDATE", "UID", "RFC822.HEADER", "RFC822.TEXT" ]) - write_email_to_disk(mailbox, data) - count = count + 1 - full_count = full_count + 1 - msg_seqnos << data[0].seqno.to_i - #break if (count > 20) - end - if (count == 0) then - puts "#{prequel} #{full_count} messages from #{mailbox}" - return - end - - # Print out a dot to signify progress - printf(".") - STDOUT.flush() - - if ($opts.delete != "none"): - # Delete messages - imap.store(msg_seqnos, "+FLAGS", [:Deleted]) - imap.expunge - end - end -end - -# MAIN -begin - $opts = MyOptions.parse(ARGV) -rescue Exception => msg - $stderr.print("#{msg}.\nType --help to see usage information.\n") - exit 1 -end - -password = get_password("Please enter the password for #{$opts.username}:") -imap = Net::IMAP.new($opts.server, 993, true) -imap.login($opts.username, password) -case ($opts.action) -when :list - imap.list("", "*").each do |mbl| - puts "#{mbl.name}" - end -when :snarf - $opts.mailboxes.each do |mailbox| - snarf_mailbox(imap, mailbox) - end -else - raise "unknown action #{$opts.action}" -end -imap.logout() -imap.disconnect() -exit 0 diff --git a/snarf_mail_imap.rb b/snarf_mail_imap.rb new file mode 100755 index 0000000..d4d6e93 --- /dev/null +++ b/snarf_mail_imap.rb @@ -0,0 +1,192 @@ +#!/usr/bin/env ruby + +# +# snarf_mail.rb +# +# Copies mail from an IMAP account +# +# Handy reference: +# http://ruby-doc.org/stdlib/libdoc/net/imap/rdoc/index.html +# + +require 'date' +require 'net/imap' +require 'optparse' +require 'ostruct' + +class MyOptions + def self.parse(args) + opts = OpenStruct.new + opts.mailboxes = Array.new + opts.delete = "none" + + # Fill in $opts values + parser = OptionParser.new do |myparser| + myparser.banner = "Usage: #{ File.basename($0) } [opts]" + myparser.separator("Specific options:") + myparser.on("--delete POLICY", "-d", + "Set delete policy to 'none' or 'old'. Default is 'none'.") do |d| + opts.delete = d + end + myparser.on("--username [USERNAME]", "-u", + "Email account to fetch. (example: \ +RareCactus@gmail.com or cmccabe@company.com)") do |u| + opts.username = u + end + myparser.on("--list-folders", "-l", + "List the IMAP folders that are present.") do |a| + raise "can only specify one action" if (opts.action) + opts.action = :list + end + myparser.on("--snarf", "-S", + "Copy mail to the current directory.") do |a| + raise "can only specify one action" if (opts.action) + opts.action = :snarf + end + myparser.on("--box [MAILBOX]", "-b", + "Act on a given mailbox. You may specify -b more than once for \ +multiple mailboxes.") do |a| + opts.mailboxes << a + end + myparser.on("--server [SERVER]", "-s", + "Email server to use. Example: imap.gmail.com") do |u| + opts.server = u + end + end + + parser.parse!(args) + raise "must specify an action" unless opts.action + raise "must give a username" unless opts.username + raise "must give a server" unless opts.server + return opts + end +end + +# Get a password from STDIN without echoing it. +# This is kind of ugly, but it does work. +def get_password(prompt) + shell_cmds = 'stty -echo && read password && echo ${password}' + printf "#{prompt}" + STDOUT.flush + pass = "" + pipe = IO.popen(shell_cmds, "r") do |pipe| + pass = pipe.read + end + echo_status = $?.exitstatus + system("stty sane") + puts + if (echo_status != 0) then + raise "get_password: error executing: #{shell_cmds}" + end + return pass.chomp +end + +def format_uid(uid) + # We don't know how to deal with non-numeric UIDs. Best just to leave them + # alone. + return uid if (uid =~ /[^0123456789]/) + + # Pad numeric uids out to 6 digits + return sprintf("%006d", uid) +end + +def format_date(date) + date.gsub!(' ', '_') +end + +def get_sanitized_email_name(mailbox, arr) + msn = mailbox.dup + msn.gsub!(' ', '_') + msn.gsub!('/', '.') + return "#{msn}_#{format_date(arr["INTERNALDATE"])}_#{format_uid(arr["UID"])}" +end + +def write_email_to_disk(mailbox, data) + arr = data[0].attr + filename = get_sanitized_email_name(mailbox, arr) + fp = File.open(filename, 'w') + fp.write(arr["RFC822.HEADER"]) + fp.write(arr["RFC822.TEXT"]) + fp.close +end + +def snarf_mailbox(imap, mailbox) + full_count = 0 + first_time = true + + searchterms = [ "NOT", "DELETED" ] + if $opts.delete == "old" + t = Date.today() - 365 + time_str = t.strftime("%e-%b-%Y") + searchterms << "BEFORE" << time_str + prequel = "fetched and deleted: " + elsif $opts.delete == "none" + prequel = "fetched: " + else + raise "expected one of 'old', 'none' for delete argument." + end + + while true + count = 0 + msg_seqnos = Array.new + + imap.select(mailbox) + imap.search(searchterms).each do |message_id| + if (first_time == true) then + # Print a dot immediately after making first contact with the server. + # It is reassuring to the user. + printf(".") + STDOUT.flush() + first_time = false + end + data = imap.fetch(message_id, + [ "INTERNALDATE", "UID", "RFC822.HEADER", "RFC822.TEXT" ]) + write_email_to_disk(mailbox, data) + count = count + 1 + full_count = full_count + 1 + msg_seqnos << data[0].seqno.to_i + #break if (count > 20) + end + if (count == 0) then + puts "#{prequel} #{full_count} messages from #{mailbox}" + return + end + + # Print out a dot to signify progress + printf(".") + STDOUT.flush() + + if $opts.delete != "none" then + # Delete messages + imap.store(msg_seqnos, "+FLAGS", [:Deleted]) + imap.expunge + end + end +end + +# MAIN +begin + $opts = MyOptions.parse(ARGV) +rescue Exception => msg + $stderr.print("#{msg}.\nType --help to see usage information.\n") + exit 1 +end + +password = get_password("Please enter the password for #{$opts.username}:") +imap = Net::IMAP.new($opts.server, 993, true) +imap.login($opts.username, password) +case ($opts.action) +when :list + imap.list("", "*").each do |mbl| + puts "#{mbl.name}" + end +when :snarf + $opts.mailboxes.each do |mailbox| + snarf_mailbox(imap, mailbox) + end +else + raise "unknown action #{$opts.action}" +end +imap.logout() +imap.disconnect() +exit 0 diff --git a/snarf_mail_pop.rb b/snarf_mail_pop.rb new file mode 100755 index 0000000..3f6f115 --- /dev/null +++ b/snarf_mail_pop.rb @@ -0,0 +1,84 @@ +#!/usr/bin/env ruby + +# +# snarf_gmail.rb +# +# Copies mail from a gmail account +# You need ruby 1.9 for this +# +# Problem: this appears to only download some mails (usually around 383 or +# so). One workaround is to run this multiple times. Still not sure if there +# is a better workaround. +# +# Colin McCabe +# + +require 'net/pop' +require 'optparse' +require 'ostruct' + +class MyOptions + def self.parse(args) + opts = OpenStruct.new + + # Fill in $opts values + parser = OptionParser.new do |myparser| + myparser.banner = "Usage: #{ File.basename($0) } [opts]" + myparser.separator("Specific options:") + myparser.on("--username USERNAME", "-u", + "Email account to fetch. (example: \ +RareCactus@gmail.com)") do |u| + opts.username = u + end + myparser.on("--dry-run", "-d", + "Dry run. State what would be done without actually \ +doing it.") do |a| + opts.dry_run = true + end + end + + parser.parse!(args) + raise "must give a username" unless opts.username + return opts + end +end + +# MAIN +begin + $opts = MyOptions.parse(ARGV) +rescue Exception => msg + $stderr.print("#{msg}.\nType --help to see usage information.\n") + exit 1 +end + +puts "type password for #{$opts.username}" +password = gets +password.chomp! + +Net::POP3.enable_ssl(OpenSSL::SSL::VERIFY_NONE) +Net::POP3.start("pop.gmail.com", 995, $opts.username, password) do |pop| + #pop.reset() + mails = pop.mails + n_mails = pop.n_mails + puts "found #{n_mails} mails." + if ($opts.dry_run) + puts "successfully connected." + exit 0 + end + count = 0 + mails.each do |mail| + fname = mail.unique_id + #fname = sprintf("%08d", count) + File.open(fname, 'w+') do|f| + f.write mail.pop + end + count = count + 1 + if ((count % 100) == 0) + count = 0 + print "." + STDOUT.flush + end + end +end +puts "done." +exit 0 -- 1.6.6.rc1.39.g9a42