#!/usr/bin/python2
# vim:fileencoding=utf-8 tabstop=4 :
'''access MacOS Resources in bare or AppleDouble files.

Name: macosresources.py

By: Tony Nelson  on  12 Jan 2015

get_applesingle_data() loads AppleSingle/AppleDouble files into memory,
with or without Data or Resource data.

load_resources() loads Resource files from data files, from AppleSingle/
AppleDouble data, and, on MOSX, from /rsrc files.

Find_resources() finds the resources from a base path.

ditto_order() prints a list of the files and Resource files in the order
needed by the Next-Apple's rather shoddy ditto program when expanding a
.zip file.

When run as a script, examines the file and prints what AppleSingle/
AppleDouble and Resource data it finds, or, if the first argument is
"--ditto", prints the file or directory in ditto-order.

Writing resource files or AppleSingle/AppleDouble files is not
implemented, as I see little need for it, compared even to the need to
read such old files.

Ref _AppleSingle/AppleDouble Formats: Developer's Note_
    <http://support.apple.com/kb/TA32537>
Ref _Inside Macintosh Volumes I, II, and III_, I-128.

'''

__author__  = "Tony Nelson"
__email__   = "tonynelson@georgeanelson.com"
__url__     = "http://georgeanelson.com/macos_resources.htm"
__copyright__ = "Copyright 2015 by George A. Nelson.  All rights reserved."
__license__ = "Python"
__date__    = "12 Jan 2015"
__version__ = "1.0"

__all__ = [ 'is_applesingle_data', 'is_applesingle_file', 'get_applesingle_data', 'applesingle_names',
            'is_resource_data', 'is_resource_file', 'load_resources', 'get_resource', 'get_named_resource',
            'find_resources', 'ditto_order' ]

import struct, os, sys

if not hasattr(struct, "unpack_from"):
    def _unpack_from(fmt, buf, offset=0):
        return struct.unpack(fmt, buf[offset:offset+struct.calcsize(fmt)])
    struct.unpack_from = _unpack_from


'''AppleSingle/AppleDouble file:

Table 2-1: AppleSingle file header

Field Length
----- ------
Magic number ------- 4 bytes
Version number ------ 4 bytes
Filler ------------- 16 bytes
Number of entries ----- 2 bytes
[list of entries]

Entry descriptor for each entry:
Entry ID ------ 4 bytes
Offset -------- 4 bytes
Length -------- 4 bytes

Apple has defined a set of entry IDs and their values as follows:

Data Fork -------- 1 Data fork
Resource Fork ----- 2 Resource fork
Real Name -------- 3 File's name as created on home file system
Comment --------- 4 Standard Macintosh comment
Icon, B&W -------- 5 Standard Macintosh black and white icon
Icon, Color -------- 6 Macintosh color icon
File Dates Info ------8 File creation date, modification date, and so on
Finder Info -------- 9 Standard Macintosh Finder information (16 bytes, w/extended finder information if 32 bytes)
Macintosh File Info ---10 Macintosh file information, attributes, and so on
ProDOS File Info -----11 ProDOS file information, attributes, and so on
MS-DOS File Info ----12 MS-DOS file information, attributes, and so on
Short Name --------13 AFP short name
AFP File Info ------- 14 AFP file information, attributes, and so on
Directory ID --------15 AFP directory ID
'''
AS_MAGIC = 0x00051600
AD_MAGIC = 0x00051607
AD_VER = 0x00020000
AD_VER1 = 0x00010000
AD_HDR = '>LL16xH'          # Magic, Version, filler, #entries
AD_ENT = '>lLL'             # Entry ID, Offset (from BOF), Length
AD_HDR_SZ = struct.calcsize(AD_HDR)
AD_ENT_SZ = struct.calcsize(AD_ENT)
#AppleSingle = collections.namedtuple('AppleSingle', 'double data resource name comment iconbw icon obsinfo dates fndrinfo mac prodos msdos afpname afpinfo afpid')

applesingle_names = ( 'data', 'resource', 'name', 'comment', 'iconbw', 'icon', 'obsinfo',
             'dates', 'fndrinfo', 'mac', 'prodos', 'msdos', 'afpname', 'afpinfo', 'afpid' )
_as_id2name = dict([(i+1,v) for i,v in enumerate(applesingle_names)])

def is_applesingle_data(data):
    '''Test data for AppleSingle/AppleDouble magic.
    '''
    if len(data) > AD_HDR_SZ:
        hdr = struct.unpack_from(AD_HDR, data)
        if hdr[0] in (AS_MAGIC, AD_MAGIC) and hdr[1] in (AD_VER, AD_VER1):
            return (True, hdr[0] == AD_MAGIC, hdr[2])
    return (False, False, 0)

def is_applesingle_file(f):
    '''Test a file for AppleSingle/AppleDouble magic.
    '''
    f.seek(0)
    data = f.read(AD_HDR_SZ+1)
    return is_applesingle_data(data)

def get_applesingle_data(f, get_resources=False, get_data=False):
    '''Given a file that may be AppleSingle/AppleDouble data, return a dict
       containing the extracted data or raise TypeError, or ValueError for a
       defective file.  Each key is from applesingle_names, each value will be
       the data as a byte string, or the size of the data if not loaded, or None
       for not present; key "isad" will be True if AppleDouble.
    '''
    isas, isad, nad = is_applesingle_file(f)
    if not isas:
        raise TypeError, "Not AppleSingle/AppleDouble"
    fields = dict()
    fields['isad'] = isad
    offset = AD_HDR_SZ
    for i in range(nad):
        f.seek(offset)
        ad = f.read(AD_ENT_SZ)
        eid, eoff, elen = struct.unpack(AD_ENT, ad)
        #print '%d: id=%d offset=%d length=%d' % (i, eid, eoff, elen)
        if eid not in _as_id2name:
            raise ValueError, "AppleSingle/AppleDouble Entry %s ID %s out of range" % (i, eid)
        if (get_data or eid != 1) and (get_resources or eid != 2):
            f.seek(eoff)
            v = f.read(elen)
            if len(v) != elen:
                raise ValueError, "AppleSingle/AppleDouble Entry %s ID %s damaged" % (i, eid)
            fields[_as_id2name[eid]] = v
        else:
            fields[_as_id2name[eid]] = elen
        offset += AD_ENT_SZ
    return fields


'''Resource fork:

From _Inside Macintosh Volumes I, II, and III_, I-128.

Resource File:
     16b    resource header
    112b    reserved (Apple)
    128b    app data
    ...     resource data...
            resource map

resource header:
      4b    resource data bof offset
      4b    resource map bof offset
      4b    resource data length
      4b    resource map length

resource data entry:
      4b    data length
    ...     data

resource map header:
     16b    0 (reserved for copy of resource header)
      4b    0 (reserved for handle to next map)
      2b    0 (reserved for file reference number)
      2b    Resource file attributes
      2b    type list offset in map
      2b    name list offset in map

type list:
      2b    #type entries - 1 (eww)
      8b    each entry:
        4b  type
        2b  #of type - 1 (eww)
        2b  reference list type list map offset

reference list entry (array per type):
      2b    ID
      2b    name list offset or -1 if no name
      1b    attributes
      3b    resource data entry data offset
      4b    0 (reserved for handle to resource)

name list entry:
      1b    length of name
      nb    name

'''
RES_FILE_HDR = '>LLLL'          # data offset, map offset, data length, map length
RES_DATA_ENT = '>L'             # length, followed by data
RES_MAP_HDR  = '>16x4x2xHHH'    # file attrs, type list offset in map, name list offset in map
RES_TYPE_HDR = '>h'             # #type entries - 1
RES_TYPE_ENT = '>4shH'          # type, #of type - 1, reference list type list map offset
RES_REF_ENT  = '>HHL4x'         # ID, name list offset, 1b resource attrs + 3b data entry data offset
RES_NAME_ENT = '>B'             # name pascal string
class _Resource:
    def __init__(self, data, id, name, attrs, size):
        self.data = data
        self.id = id
        self.name = name
        self.attrs = attrs
        self.size_from_file = size

def is_resource_data(d, sz):
    '''Guess whether data is Resource data, by examining the header.
    '''
    if len(d) <= 256:
        return False
    datao, mapo, datalen, maplen = struct.unpack_from(RES_FILE_HDR, d)
    if datao < 256 or datao+datalen > mapo or mapo+maplen > sz:
        return None
    if maplen < struct.calcsize(RES_MAP_HDR)+struct.calcsize(RES_TYPE_HDR):
        return None
    return (datao, mapo, datalen, maplen)

def is_resource_file(f):
    '''Guess whether a file is a Resource file, by examining the header.
    '''
    f.seek(0,2)
    sz = f.tell()
    f.seek(0)
    data = f.read(257)
    return is_resource_data(data, sz)

def load_resources(data):
    '''Given data that should be the contents of a MacOS Resource file, unpack
       it into its resources, as a dict of types whose entries are tuple(dict
       of resources by id, dict of resources by name), where a resource is a
       dict(data=, id=, name=, attrs=), or raise TypeError if not a Resorce
       file, or ValueError if it appears damaged.
    '''
    def checked_slice(s, start, length, name):
        if start > len(s):
            raise ValueError, "Resource %s start %d > len(%d)" % (name, start, len(s))
        if start+length > len(s):
            raise ValueError, "Resource %s length %d + start %d past len(%d)" % (name, length, start, len(s))
        return s[start:start+length]
    fhdr = is_resource_data(data, len(data))
    if not fhdr:
        raise TypeError, "Not Resource data"
    resources = dict()
    datao, mapo, datalen, maplen = fhdr
    rmap = checked_slice(data, mapo, maplen, "Map")
    data = checked_slice(data, datao, datalen, "Data")
    map_attrs, otypelist, onamelist = struct.unpack_from(RES_MAP_HDR, rmap)
    try:
        ntypesm1, = struct.unpack_from(RES_TYPE_HDR, rmap, otypelist)
    except struct.error, e:
        raise ValueError, "Resource Map: %s" % e
    ntypes = ntypesm1 + 1
    otype = otypelist + struct.calcsize(RES_TYPE_HDR)
    errs = []
    for i in range(ntypes):
        try:
            restype, ntypem1, otypelistmap = struct.unpack_from(RES_TYPE_ENT, rmap, otype)
        except struct.error, e:
            errs.append("Resource Map Type entry: %s" % e)
            break
        resbyid = dict()
        resbyname = dict()
        #print "type=%s (%d)" % (restype, ntypem1+1)
        for j in range(ntypem1+1):
            try:
                resid, onamelistname, attrsdataoffset = struct.unpack_from(RES_REF_ENT, rmap, otypelist+otypelistmap)
            except struct.error, e:
                errs.append("Resource Map Resource entry: %s" % e)
                break
            #print "  id=%d" % resid
            resname = None
            if onamelistname != 0xFFFF:
                oname = onamelist + onamelistname
                try:
                    resnamel, = struct.unpack_from(RES_NAME_ENT, rmap, oname)
                except struct.error, e:
                    errs.append("Resource Map Resource Name: %s" % e)
                resname = rmap[oname+1:oname+resnamel+1]
                #print "    name=%s" % resname
            resattrs = attrsdataoffset>>24 & 0xFF
            oresdataentdata = attrsdataoffset & 0x00FFFFFF
            try:
                resdatalen, = struct.unpack_from(RES_DATA_ENT, data, oresdataentdata)
            except struct.error, e:
                errs.append("Resource Map Resource data length: %s" % e)
                resdatalen = 0
                resdata = ''
            else:
                oresdataentdata += struct.calcsize(RES_DATA_ENT)
                resdata = data[oresdataentdata:oresdataentdata+resdatalen]
                if len(resdata) != resdatalen:
                    errs.append('Resource Map: %s %d "%s" wrong data size/offset %d @ %d' % (
                        restype, resid, resname, resdatalen, oresdataentdata ))
            resource = _Resource(resdata, resid, resname, resattrs, resdatalen)
            resbyid[resid] = resource
            if resname:
                resbyname[resname] = resource
            otypelistmap += struct.calcsize(RES_REF_ENT)
        resources[restype] = (resbyid, resbyname)
        otype += struct.calcsize(RES_TYPE_ENT)
    return resources, errs

def get_resource(resources, restype, resid):
    '''Get a resource by Type and ID from loaded resources.
    '''
    try:
        return resources[restype][0][resid]
    except KeyError:
        return None

def get_named_resource(resources, restype, resname):
    '''Gete a resource by Type and Name from loaded resources.
    '''
    try:
        return resources[restype][1][resname]
    except KeyError:
        return None


def find_resources(path):
    '''Try to find resources.  On MOSX, there will be a Resource fork to load
       them from.  On Linux, the resources will be in an AppleSingle/AppleDouble
       file, which may be the current path, or a .AppleDouble directory in the
       current directory, or in a parallel __MACOSX directory tree with the file
       name prefixed with "._".  Return the found path or None.
    '''
    isas = isres = False
    try:
        f = open(path)
    except IOError:
        return None         # base file not found
    else:
        isas, isad, nad = is_applesingle_file(f)
        if not isas:
            isres = is_resource_file(f)
        f.close()
    if not isas and not isres and os.path.basename(os.path.dirname(path)) != ".AppleDouble":
        newpath = os.path.join(os.path.dirname(path),".AppleDouble",os.path.basename(path))
        try:
            os.stat(newpath)
            path = newpath
        except OSError, e:
            newpath = os.path.join(os.path.dirname(path),"._"+os.path.basename(path))
            try:
                os.stat(newpath)
                path = newpath
            except OSError, e:
                newpath = os.path.join("__MACOSX",os.path.dirname(path),"._"+os.path.basename(path))
                try:
                    os.stat(newpath)
                    path = newpath
                except OSError, e:  # on MOSX?
                    newpath = path + "/..namedfork/rsrc"
                    try:
                        os.stat(newpath)
                        path = newpath
                    except OSError, e:
                        path = None
    return path


def ditto_order(args):
    '''Print files in a directory tree in the order the rather crummy MOSX ditto
    needs.  Ditto is used by the shoddy MOSX GUI for archiving files.

    From the directory containing both the files to archive and the __MOCOSX
    directory, pipe the output to "zip -@ name" to make a .zip file that MOSX
    can use.

    Traverses the directory tree and print the files and corresponding
    AppleDouble files in the parallel __mac_os_x/ directory, one per line in
    pairs: main file, AppleDouble file.
    '''
    need__ = True

    def ppath(root, fn):
        path = os.path.join(root,fn)
        mpath = os.path.join("__MACOSX",root,"._"+fn)
        print path
        if os.path.exists(mpath):
            print mpath

    for inpath in args:
        if os.path.isdir(inpath):
            for root, dirs, files in os.walk(inpath):
                print root
                mpath = os.path.join("__MACOSX",root)
                if os.path.exists(mpath):
                    if need__:
                        print "__MACOSX"
                        need__ = False
                    print mpath
                files.sort()
                for fn in files:
                    ppath(root, fn)
        elif os.path.isfile(inpath):
            ppath(*os.path.split(inpath))
        else:
            print >>sys.stderr, 'not found: "%s"' % inpath


def main():
    if len(sys.argv) < 2:
        print "Dump AppleSingle/AppleDouble parts and Resource headers.\n\nUsage: %s pathname" % sys.argv[0]
        print "  %s --ditto pathname\n" % sys.argv[0]
        sys.exit(1)

    if len(sys.argv) >= 3 and sys.argv[1] == "--ditto":
        ditto_order(sys.argv[2:])
        sys.exit(0)

    if len(sys.argv) >= 3 and sys.argv[1] == "--ditto_file":
        ditto_order([l[:-1] for l in open(sys.argv[2]).readlines()])
        sys.exit(0)

    path = find_resources(sys.argv[1])
    if not path:
        print "can't find resources"
        sys.exit(2)
    if path != sys.argv[1]:
        print 'found resources at "%s"' % path
    f = open(path)

    resdata = None
    try:
        asd = get_applesingle_data(f, get_resources=True)
    except TypeError, e:           # maybe just resource data?
        print e
    except ValueError, e:
        print e
    else:
        resdata = asd.get('resource')
        for i,n in enumerate(('isas',)+applesingle_names):
            v = asd.get(n)
            if v is not None:
                l = v
                try:
                    l = len(v)
                    v = v[:32]
                except:
                    pass
                print "%2d %8s %5d %r" % (i, n, l, v)
        print
    if resdata is None:
        f.seek(0)
        resdata = f.read()
    f.close()

    if not resdata:
        print "empty resource data"
        sys.exit(0)

    try:
        resources, errs = load_resources(resdata)
    except (TypeError, ValueError), e:
        print "can't load resources:", e
        sys.exit(1)

    for s in errs:
        print s
    for restype, (resbyid, resbyname) in resources.items():
        print restype
        print " ", [(v.id, v.name, len(v.data)) for v in resbyid.values()]
        print " ", [(v.name, v.id, len(v.data)) for v in resbyname.values()]

if __name__ == '__main__':
    main()