diff options
Diffstat (limited to 'src/vfs/extfs/helpers/s3+.in')
-rw-r--r-- | src/vfs/extfs/helpers/s3+.in | 490 |
1 files changed, 490 insertions, 0 deletions
diff --git a/src/vfs/extfs/helpers/s3+.in b/src/vfs/extfs/helpers/s3+.in new file mode 100644 index 0000000..2ab596a --- /dev/null +++ b/src/vfs/extfs/helpers/s3+.in @@ -0,0 +1,490 @@ +#! @PYTHON@ +# -*- coding: utf-8 -*- + +# +# Midnight Commander compatible EXTFS for accessing Amazon Web Services S3. +# Written by Jakob Kemi <jakob.kemi@gmail.com> 2009 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +# +# Notes: +# This EXTFS exposes buckets as directories and keys as files +# Due to EXTFS limitations all buckets & keys have to be read initially which might +# take quite some time. +# Tested on Debian with Python 2.4-2.6 and boto 1.4c and 1.6b +# (Python 2.6 might need -W ignore::DeprecationWarning due to boto using +# deprecated module Popen2) +# +# +# Installation: +# Make sure that boto <http://code.google.com/p/boto> (python-boto in Debian) is installed. +# Preferably pytz (package python-tz in Debian) should be installed as well. +# +# Save as executable file /usr/libexec/mc/extfs/s3 (or wherever your mc expects to find extfs modules) +# +# Settings: (should be set via environment) +# Required: +# AWS_ACCESS_KEY_ID : Amazon AWS acces key (required) +# AWS_SECRET_ACCESS_KEY : Amazon AWS secret access key (required) +# Optional: +# MCVFS_EXTFS_S3_LOCATION : where to create new buckets: "EU" - default, "USWest", "APNortheast" etc. +# MCVFS_EXTFS_S3_DEBUGFILE : write debug info to this file (no info by default) +# MCVFS_EXTFS_S3_DEBUGLEVEL : debug messages level ("WARNING" - default, "DEBUG" - verbose) +# +# +# Usage: +# Open dialog "Quick cd" (<alt-c>) and type: s3:// <enter> (or simply type `cd s3://' in shell line) +# +# +# History: +# +# 2015-07-22 Dmitry Koterov <dmitry.koterov@gmail.com> +# - Support for non-ASCII characters in filenames (system encoding detection). +# +# 2015-05-21 Dmitry Koterov <dmitry.koterov@gmail.com> +# - Resolve "Please use AWS4-HMAC-SHA256" error: enforce the new V4 authentication method. +# It is required in many (if not all) locations nowadays. +# - Now s3+ works with buckets in different regions: locations are auto-detected. +# - Debug level specification support (MCVFS_EXTFS_S3_DEBUGLEVEL). +# +# 2009-02-07 Jakob Kemi <jakob.kemi@gmail.com> +# - Updated instructions. +# - Improved error reporting. +# +# 2009-02-06 Jakob Kemi <jakob.kemi@gmail.com> +# - Threaded list command. +# - Handle rm of empty "subdirectories" (as seen in mc). +# - List most recent datetime and total size of keys as directory properties. +# - List modification time in local time. +# +# 2009-02-05 Jakob Kemi <jakob.kemi@gmail.com> +# - Initial version. +# + +import sys +import os +import time +import re +import datetime + + +import boto +from boto.s3.connection import S3Connection +from boto.exception import BotoServerError + + +# Get settings from environment +USER=os.getenv('USER','0') +AWS_ACCESS_KEY_ID=os.getenv('AWS_ACCESS_KEY_ID') +AWS_SECRET_ACCESS_KEY=os.getenv('AWS_SECRET_ACCESS_KEY') +S3LOCATION=os.getenv('MCVFS_EXTFS_S3_LOCATION', 'EU') +DEBUGFILE=os.getenv('MCVFS_EXTFS_S3_DEBUGFILE') +DEBUGLEVEL=os.getenv('MCVFS_EXTFS_S3_DEBUGLEVEL', 'WARNING') + +if not AWS_ACCESS_KEY_ID or not AWS_SECRET_ACCESS_KEY: + sys.stderr.write('Missing AWS_ACCESS_KEY_ID or AWS_SECRET_ACCESS_KEY environment variables.\n') + sys.exit(1) + +# Setup logging +if DEBUGFILE: + import logging + logging.basicConfig( + filename=DEBUGFILE, + level=logging.DEBUG, + format='%(asctime)s %(levelname)s %(message)s') + logging.getLogger('boto').setLevel(getattr(logging, DEBUGLEVEL)) +else: + class Void(object): + def __getattr__(self, attr): + return self + def __call__(self, *args, **kw): + return self + logging = Void() + +logger = logging.getLogger('s3extfs') + + +def __fix_io_encoding(last_resort_default='UTF-8'): + """ + The following code is needed to work with non-ASCII characters in filenames. + We're trying hard to detect the system encoding. + """ + import codecs + import locale + for var in ('stdin', 'stdout', 'stderr'): + if getattr(sys, var).encoding is None: + enc = None + if enc is None: + try: + enc = locale.getpreferredencoding() + except: + pass + if enc is None: + try: + enc = sys.getfilesystemencoding() + except: + pass + if enc is None: + try: + enc = sys.stdout.encoding + except: + pass + if enc is None: + enc = last_resort_default + setattr(sys, var, codecs.getwriter(enc)(getattr(sys, var), 'strict')) +__fix_io_encoding() + + +def threadmap(fun, iterable, maxthreads=16): + """ + Quick and dirty threaded version of builtin method map. + Propagates exception safely. + """ + from threading import Thread + import Queue + + items = list(iterable) + nitems = len(items) + if nitems < 2: + return map(fun, items) + + # Create and fill input queue + input = Queue.Queue() + output = Queue.Queue() + + for i,item in enumerate(items): + input.put( (i,item) ) + + class WorkThread(Thread): + """ + Takes one item from input queue (thread terminates when input queue is empty), + performs fun, puts result in output queue + """ + def run(self): + while True: + try: + (i,item) = input.get_nowait() + try: + result = fun(item) + output.put( (i,result) ) + except: + output.put( (None,sys.exc_info()) ) + except Queue.Empty: + return + + # Start threads + for i in range( min(len(items), maxthreads) ): + t = WorkThread() + t.setDaemon(True) + t.start() + + # Wait for all threads to finish & collate results + ret = [] + for i in range(nitems): + try: + i,res = output.get() + if i == None: + raise res[0],res[1],res[2] + except Queue.Empty: + break + ret.append(res) + + return ret + +logger.debug('started') + +if S3LOCATION.upper() == "EU": + S3LOCATION = "eu-central-1" +if S3LOCATION.upper() == "US": + S3LOCATION = "us-east-1" +for att in dir(boto.s3.connection.Location): + v = getattr(boto.s3.connection.Location, att) + if type(v) is str and att.lower() == S3LOCATION.lower(): + S3LOCATION = v + break +logger.debug('Using location %s for new buckets', S3LOCATION) + + +def get_connection(location): + """ + Creates a connection to the specified region. + """ + os.environ['S3_USE_SIGV4'] = 'True' # only V4 method is supported in all locations. + return boto.s3.connect_to_region( + location, + aws_access_key_id=AWS_ACCESS_KEY_ID, + aws_secret_access_key=AWS_SECRET_ACCESS_KEY + ) + + +# Global S3 default connection. +s3 = get_connection('us-east-1') + + +def get_bucket(name): + """ + Returns a bucket by its name, no matter what region is it in. + """ + try: + b = s3.get_bucket(name, validate=False) + b.get_location() # just to raise an exception on error + return b + except boto.exception.S3ResponseError, e: + # Seems this is the only proper way to switch to the bucket's region. + # Requesting of the default region for "?location" does not work unfortunately. + m = re.search(r'<Region>(.*?)</Region>', e.body) + if m: + return get_connection(m.group(1)).get_bucket(name) + raise + + +logger.debug('argv: ' + str(sys.argv)) +try: + cmd = sys.argv[1] + args = sys.argv[2:] +except: + sys.stderr.write('This program should be called from within MC\n') + sys.exit(1) + + +def handleServerError(msg): + e = sys.exc_info() + msg += ', reason: ' + e[1].reason + logger.error(msg, exc_info=e) + sys.stderr.write(msg+'\n') + sys.exit(1) + +# +# Lists all S3 contents +# +if cmd == 'list': + if len(args) > 0: + path = args[0] + else: + path = '' + + logger.info('list') + + rs = s3.get_all_buckets() + + # Import python timezones (pytz) + try: + import pytz + except: + logger.warning('Missing pytz module, timestamps will be off') + # A fallback UTC tz stub + class pytzutc(datetime.tzinfo): + def __init__(self): + datetime.tzinfo.__init__(self) + self.utc = self + self.zone = 'UTC' + def utcoffset(self, dt): + return datetime.timedelta(0) + def tzname(self, dt): + return "UTC" + def dst(self, dt): + return datetime.timedelta(0) + pytz = pytzutc() + + + # Find timezone + # (yes, timeZONE as in _geographic zone_ not EST/CEST or whatever crap we get from time.tzname) + # http://regebro.wordpress.com/2008/05/10/python-and-time-zones-part-2-the-beast-returns/ + def getGuessedTimezone(): + # 1. check TZ env. var + try: + tz = os.getenv('TZ', '') + return pytz.timezone(tz) + except: + pass + # 2. check if /etc/timezone exists (Debian at least) + try: + if os.path.isfile('/etc/timezone'): + tz = open('/etc/timezone', 'r').readline().strip() + return pytz.timezone(tz) + except: + pass + # 3. check if /etc/localtime is a _link_ to something useful + try: + if os.path.islink('/etc/localtime'): + link = os.readlink('/etc/localtime') + tz = '/'.join(link.split(os.path.sep)[-2:]) + return pytz.timezone(tz) + except: + pass + # 4. use time.tzname which will probably be wrong by an hour 50% of the time. + try: + return pytz.timezone(time.tzname[0]) + except: + pass + # 5. use plain UTC ... + return pytz.utc + + tz=getGuessedTimezone() + logger.debug('Using timezone: ' + tz.zone) + + # AWS time is on format: 2009-01-07T16:43:39.000Z + # we "want" MM-DD-YYYY hh:mm (in localtime) + expr = re.compile(r'^(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})\.\d{3}Z$') + def convDate(awsdatetime): + m = expr.match(awsdatetime) + ye,mo,da,ho,mi,se = map(int,m.groups()) + + dt = datetime.datetime(ye,mo,da,ho,mi,se, tzinfo=pytz.utc) + return dt.astimezone(tz).strftime('%m-%d-%Y %H:%M') + + + def bucketList(b): + b = get_bucket(b.name) # get the bucket at its own region + totsz = 0 + mostrecent = '1970-01-01T00:00:00.000Z' + ret = [] + for k in b.list(): + if k.name.endswith('/'): + # Sometimes someone create S3 keys which are ended with "/". + # Extfs cannot work with them as with files, and such keys may + # hide same-name directories, so we skip them. + continue + mostrecent = max(mostrecent, k.last_modified) + datetime = convDate(k.last_modified) + ret.append('%10s %3d %-8s %-8s %d %s %s\n' % ( + '-rw-r--r--', 1, USER, USER, k.size, datetime, b.name+'/'+k.name) + ) + totsz += k.size + + datetime=convDate(mostrecent) + sys.stdout.write('%10s %3d %-8s %-8s %d %s %s\n' % ( + 'drwxr-xr-x', 1, USER, USER, totsz, datetime, b.name) + ) + for line in ret: + sys.stdout.write(line) + + threadmap(bucketList, rs) + +# +# Fetch file from S3 +# +elif cmd == 'copyout': + archivename = args[0] + storedfilename = args[1] + extractto = args[2] + + bucket,key = storedfilename.split('/', 1) + logger.info('copyout bucket: %s, key: %s'%(bucket, key)) + + try: + b = get_bucket(bucket) + k = b.get_key(key) + + out = open(extractto, 'w') + + k.open(mode='r') + for buf in k: + out.write(buf) + k.close() + out.close() + except BotoServerError: + handleServerError('Unable to fetch key "%s"'%(key)) + +# +# Upload file to S3 +# +elif cmd == 'copyin': + archivename = args[0] + storedfilename = args[1] + sourcefile = args[2] + + bucket,key = storedfilename.split('/', 1) + logger.info('copyin bucket: %s, key: %s'%(bucket, key)) + + try: + b = get_bucket(bucket) + k = b.new_key(key) + k.set_contents_from_file(fp=open(sourcefile,'r')) + except BotoServerError: + handleServerError('Unable to upload key "%s"' % (key)) + +# +# Remove file from S3 +# +elif cmd == 'rm': + archivename = args[0] + storedfilename = args[1] + + bucket,key = storedfilename.split('/', 1) + logger.info('rm bucket: %s, key: %s'%(bucket, key)) + + try: + b = get_bucket(bucket) + b.delete_key(key) + except BotoServerError: + handleServerError('Unable to remove key "%s"' % (key)) + +# +# Create directory +# +elif cmd == 'mkdir': + archivename = args[0] + dirname = args[1] + + logger.info('mkdir dir: %s' %(dirname)) + if '/' in dirname: + logger.warning('skipping mkdir') + pass + else: + bucket = dirname + try: + get_connection(S3LOCATION).create_bucket(bucket, location=S3LOCATION) + except BotoServerError: + handleServerError('Unable to create bucket "%s"' % (bucket)) + +# +# Remove directory +# +elif cmd == 'rmdir': + archivename = args[0] + dirname = args[1] + + logger.info('rmdir dir: %s' %(dirname)) + if '/' in dirname: + logger.warning('skipping rmdir') + pass + else: + bucket = dirname + try: + b = get_bucket(bucket) + b.connection.delete_bucket(b) + except BotoServerError: + handleServerError('Unable to delete bucket "%s"' % (bucket)) + +# +# Run from S3 +# +elif cmd == 'run': + archivename = args[0] + storedfilename = args[1] + arguments = args[2:] + + bucket,key = storedfilename.split('/', 1) + logger.info('run bucket: %s, key: %s'%(bucket, key)) + + os.execv(storedfilename, arguments) +else: + logger.error('unhandled, bye') + sys.exit(1) + +logger.debug('command handled') +sys.exit(0) + |