#! /usr/bin/env ruby
#
#  pdumpfs - a daily backup system similar to Plan9's dumpfs.
#
#  DESCRIPTION:
#
#    pdumpfs is a simple daily backup system similar to
#    Plan9's dumpfs which preserves every daily snapshot.
#    You can access the past snapshots at any time for
#    retrieving a certain day's file.  Let's backup your home
#    directory with pdumpfs!
#
#    pdumpfs constructs the snapshot YYYY/MM/DD in the
#    destination directory. All source files are copied to
#    the snapshot directory for the first time. On and after
#    the second time, pdumpfs copies only updated or newly
#    created files and stores unchanged files as hard links
#    to the files of the previous day's snapshot for saving a
#    disk space.
#
#  USAGE:
#
#    % pdumpfs <source directory> <destination directory>
#             [<destination basename>]
#
#  SAMPLE CRONTAB ENTRY:
#
#    00 05 * * * pdumpfs /home/USER /backup >/dev/null 2>&1
#
#  BUGS:
#
#    pdumpfs can handle only normal files, directories, and
#    symbolic links.
#
#
# Copyright (C) 2001-2004 Satoru Takabayashi <satoru@namazu.org>
#     All rights reserved.
#     This is free software with ABSOLUTELY NO WARRANTY.
#
# You can redistribute it and/or modify it under the terms of
# the GNU General Public License version 2.
#
#
# Win32 ported by Yasuhiro Morioka <yasuhiro.morioka@k5.dion.ne.jp>
# 2003/02/01
#
# --exclude-* support by Katsuwo Komiya	<katsuwo@monochrome.jp>
#

require 'find'
require 'date'
require 'ftools'
require 'getoptlong'

module Pdumpfs
  VERSION = 0.8

  class FileMatcher
    def initialize (options = {})
      @patterns = options[:patterns]
      @globs    = options[:globs]
      @size     = calc_size(options[:size])
    end

    def calc_size (size)
      table   = { "K" => 1, "M" => 2, "G" => 3, "T" => 4, "P" => 5 }
      pattern = table.keys.join('')
      case size
      when nil
        -1
      when /^(\d+)([#{pattern}]?)$/i
        num  = Regexp.last_match[1].to_i
        unit = Regexp.last_match[2]
        num * 1024 ** (table[unit] or 0)
      else
        raise "Invalid size: #{size}"
      end
    end

    def exclude? (path)
      begin
        stat = File.lstat(path)

        if @size >= 0 and stat.file? and stat.size >= @size
          return true
        elsif @patterns.find {|pattern| pattern.match(path) }
          return true
        elsif stat.file? and 
            @globs.find {|glob| File.fnmatch(glob, File.basename(path)) }
          return true
        end
      rescue Errno::ENOENT => error
        STDERR.puts error.message
      end
      return false
    end
  end
end

def windows?
  /mswin32|cygwin|mingw|bccwin/.match(RUBY_PLATFORM)
end

if windows?
  require 'Win32API'
  require "win32ole"

  if RUBY_VERSION < "1.7.0"
    def File.link(l, t)
      proc = Win32API.new("kernel32", "CreateHardLinkA", %w(p p l), 'i')
      result = proc.call(t, l, 0)

      raise Errno::EACCES  if result == 0
    end
  end

  def expand_special_folders (dir)
    specials = %w[(?:AllUsers)?(?:Desktop|Programs|Start(?:Menu|up)) Favorites
                  Fonts MyDocuments NetHood PrintHood Recent SendTo Templates]

    pattern = Regexp.compile(sprintf('^@(%s)', specials.join('|')))

    dir.sub(pattern) do |match|
      WIN32OLE.new("WScript.Shell").SpecialFolders(match) 
    end.tr('\\','/')
  end

  def get_filesystem_type (path)
    return nil  unless(FileTest.exist?(path))

    get_volume_information = Win32API.new("kernel32", "GetVolumeInformation", 
                                          %w(P P L P P P P L), "I")

    drive = File.expand_path(path)[0..2]
    buff = "\0" * 1024
    get_volume_information.call(drive, nil, 0, nil, nil, nil, buff, 1024)

    buff.sub(/\000+/, '')
  end

  def get_file_time (time)
    get_local_time = Win32API.new("kernel32", "GetLocalTime", %w(P), 'V')
    systemtime_to_filetime = Win32API.new("kernel32", "SystemTimeToFileTime",
                                        %w(P P), 'I')

    pSYSTEMTIME = ' ' * 2 * 8     # 2byte x 8
    pFILETIME = ' ' * 2 * 8       # 2byte x 8

    get_local_time.call(pSYSTEMTIME)
    t1 = pSYSTEMTIME.unpack("S8")
    t1[0..1] = time.year, time.month
    t1[3..6] = time.day, time.hour, time.min, time.sec

    systemtime_to_filetime.call(t1.pack("S8"), pFILETIME)

    pFILETIME
  end

  GENERIC_WRITE   = 0x40000000
  OPEN_EXISTING = 3
  FILE_FLAG_BACKUP_SEMANTICS =  0x02000000

  class << File
    alias_method(:utime_orig, :utime)
  end

  def File.utime (a, m, dir)
    File.utime_orig(a, m, dir)  unless(File.directory?(dir))

    create_file =  Win32API.new("kernel32", "CreateFileA",
                                %w(P L L L L L L), "L")
    set_file_time = Win32API.new("kernel32", "SetFileTime", %w(L P P P), "I")
    close_handle = Win32API.new("kernel32", "CloseHandle", %w(L), "I")

    atime = get_file_time(a.dup.utc)
    mtime = get_file_time(m.dup.utc)

    hDir = create_file.Call(dir.dup, GENERIC_WRITE, 0, 0, OPEN_EXISTING,
                            FILE_FLAG_BACKUP_SEMANTICS, 0)
    set_file_time.call(hDir, 0, atime, mtime)
    close_handle.Call(hDir)

    return 0
  end
end

def usage
  puts "Usage: pdumpfs <source directory> <destination directory>"+
       " [destination basename]"
  puts "  -e, --exclude=PATTERN          exclude files/directories matching PATTERN"
  puts "  -s, --exclude-by-size=SIZE     exclude files larger than SIZE"
  puts "  -w, --exclude-by-glob=GLOB     exclude files matching GLOB"
  puts "  -v, --version                  print version information and exit"
  puts "  -h, --help                     show this help message"
  exit 0
end

def version
  puts "Usage: pdumpfs #{Pdumpfs::VERSION}"
  exit 0
end

def nodir (dir)
  puts "No directory: " + dir
  exit 1
end

def same_file? (f1, f2)
  File.symlink?(f1) == false and File.symlink?(f2) == false and
    File.file?(f1) and File.file?(f2) and
    File.size(f1) == File.size(f2) and File.mtime(f1) == File.mtime(f2)
end

def parse_options
  patterns  = Array.new
  globs = Array.new
  size  = nil

  parser = GetoptLong.new
  parser.set_options(['--exclude',  '-e',  GetoptLong::REQUIRED_ARGUMENT],
                     ['--exclude-by-size', GetoptLong::REQUIRED_ARGUMENT],
                     ['--exclude-by-glob', GetoptLong::REQUIRED_ARGUMENT],
                     ['--help', '-h',      GetoptLong::NO_ARGUMENT],
                     ['--version', '-v',   GetoptLong::NO_ARGUMENT]
                     )
  parser.each_option do |name, arg|
    case name
    when '--exclude'
      patterns.push(Regexp.new(arg))
    when '--exclude-by-size'
      size = arg
    when '--exclude-by-glob'
      globs.push(arg)
    when '--help'
      usage
    when '--version'
      version
    end
  end
  matcher = Pdumpfs::FileMatcher.new(:size     => size,
                                     :globs    => globs,
                                     :patterns => patterns)

  usage if ARGV.length < 2

  # strip the trailing / to avoid basename(src) == ''.
  src  = ARGV[0].sub(%r!/+$!, "")
  dest = ARGV[1]
  base = ARGV[2]

  nodir(src)  if File.directory?(src)  == false
  nodir(dest) if File.directory?(dest) == false

  if windows?
    src  = expand_special_folders(src)
    dest = expand_special_folders(dest)
  end

  return src, dest, base, matcher
end

def datedir (date)
  sprintf "%d/%02d/%02d", date.year, date.month, date.day
end

def latest_snapshot (src, dest, base)
  for i in 1 .. 31  # allow at most 31 days absence
    x = File.join(dest, datedir(Date.today - i), base)
    return x if File.directory?(x)
  end
  nil
end

# incomplete substitute for cp -p
def copy (src, dest)
  stat = File.stat(src)
  File.copy(src, dest)
  File.chmod(0200, dest) if windows?
  File.utime(stat.atime, stat.mtime, dest)
  File.chmod(stat.mode, dest) # not necessary. just to make sure
end

def update_file (s, l, t)
  type = "unsupported"
  if File.symlink?(s) == false and File.directory?(s)
    type = "directory"
    File.mkpath(t)
  else
    if File.symlink?(l) == false and File.file?(l)
      if same_file?(s, l)
        type = "unchanged"
        File.link(l, t)
      else
        type = "updated"
        copy(s, t)
      end
    else
      case File.ftype(s)
      when "file"
        type = "new file"
        copy(s, t)
      when "link"
        type = "symlink"
        File.symlink(File.readlink(s), t)
      else
        # skip block device, fifo, socket, etc...
        next
      end
    end
  end
  if Process.uid == 0 and type != "unsupported"
    if type == "symlink"
      if File.respond_to? 'lchown'
        stat = File.lstat(s)
        File.lchown(stat.uid, stat.gid, t)
      end
    else
      stat = File.stat(s)
      File.chown(stat.uid, stat.gid, t)
    end
  end
  printf "%-10s %s\n", type, s
end

def restore_dir_attributes (dirs)
  dirs.each {|dir, stat|
    File.utime(stat.atime, stat.mtime, dir)
    File.chmod(stat.mode, dir)
  }
end

def eprintf (format, *args)
  STDERR.printf("pdumpfs: " + format + "\n", *args)
end

def update_snapshot (src, latest, today, matcher)
  dirs = {};

  Find.find(src) do |s|      # path of the source file
    if matcher.exclude?(s)
      if File.lstat(s).directory? then Find.prune() else next end
    end

    r = s.sub %r[^#{Regexp.quote src}/?], ""  # relative path
    l = File.join(latest, r)  # path of the latest  snapshot
    t = File.join(today, r)   # path of the today's snapshot

    begin
      update_file(s, l, t)
    rescue Errno::ENOENT => e
      eprintf("%s", e)
      next
    rescue => e
      eprintf("%s", e)
    end

    if File.ftype(s) == "directory"
      dirs[t] = File.stat(s)
    end
  end

  restore_dir_attributes(dirs)
end

# incomplete substitute for cp -rp
def recursive_copy (src, dest, matcher)
  dirs = {};

  Find.find(src) do |s|
    if matcher.exclude?(s)
      if File.lstat(s).directory? then Find.prune() else next end
    end

    r = s.sub(%r[^#{Regexp.quote(src)}/?], "")
    t = File.join(dest, r)

    begin
      case File.ftype(s)
      when "directory"
        File.mkpath(t)
      when "file"
        copy(s, t)
      when "link"
        File.symlink(File.readlink(s), t)
      else
        # skip block device, fifo, socket, etc...
        next
      end
      if Process.uid == 0
        if File.ftype(s) == "link"
          if File.respond_to? 'lchown'
            stat = File.lstat(s)
            File.lchown(stat.uid, stat.gid, t)
          end
        else
          stat = File.stat(s)
          File.chown(stat.uid, stat.gid, t)
        end
      end
    rescue Errno::ENOENT => e
      eprintf("%s", e)
      next
    rescue => e
      eprintf("%s", e)
    end

    if File.ftype(s) == "directory"
      dirs[t] = File.stat(s)
    end
  end
  restore_dir_attributes(dirs)
end

def main
  src, dest, base, matcher = parse_options
  base = File.basename(src) unless base

  if windows?
    fstype = get_filesystem_type(dest)

    if fstype !~ /ntfs/i
      STDERR.fprintf("pdumpfs: only NTFS is supported but %s is %s.",
                     dest, fstype)
      exit 1
    end
  end

  latest = latest_snapshot(src, dest, base)
  today  = File.join(dest, datedir(Date.today), base)

  File.umask(0077)
  File.mkpath(today)
  if latest
    update_snapshot(src, latest, today, matcher)
  else
    recursive_copy(src, today, matcher)
  end
end

main if __FILE__ == $0
