Add tagger.py
authorColin McCabe <cmccabe@alumni.cmu.edu>
Sun, 13 Dec 2009 23:49:51 +0000 (15:49 -0800)
committerColin McCabe <cmccabe@alumni.cmu.edu>
Sun, 13 Dec 2009 23:49:51 +0000 (15:49 -0800)
id3v2_wrapper.sh [new file with mode: 0755]
tagger.py [new file with mode: 0755]

diff --git a/id3v2_wrapper.sh b/id3v2_wrapper.sh
new file mode 100755 (executable)
index 0000000..ac7321c
--- /dev/null
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+# Run id3v2 without babbling to stdout about stripped tags
+id3v2 "$@" | grep -v '^Stripping id3 tag in .*\.\.\.id3v[12] stripped.'
+
diff --git a/tagger.py b/tagger.py
new file mode 100755 (executable)
index 0000000..bda52af
--- /dev/null
+++ b/tagger.py
@@ -0,0 +1,331 @@
+#!/usr/bin/python
+
+#
+# Changes mp3 ID3 tags to match the file names.
+#
+# I like to store my mp3s in a file structure like this:
+#
+# Artist Name - Album Title = Conductor [Encoding]/01 - Track 1.mp3
+# Artist Name - Album Title = Conductor [Encoding]/02 - Track 2.mp3
+# ...
+#
+# This script runs through an entire directory of mp3s, and changes all the
+# ID3 tags to match the file names.
+#
+# Philosophical aside: I guess you could argue that this defeats the point of
+# ID3 tags, since under this system, allthe information is stored in the file
+# name. This is true; however, I need to play my music on a lot of different
+# systems (like mp3 players) which don't use my file naming scheme.
+#
+# I have had bad experiences with ID3 tags in the past. Every program seems
+# to generate and parse them a little bit differently. The ID3 standard
+# doesn't even specify whether to use unicode vs. Latin-1, let alone what you
+# should do if a file has conflicting ID3v1 and ID3v2 tags.
+#
+# It's just easier to use a filing system that actually works well-- the Linux
+# filesystem -- and regard IDv3 tags as something ephemeral that's generated
+# out of the "real" file information.
+#
+# Colin McCabe
+# 2008/12/7
+#
+
+import getopt
+import os
+import re
+import stat
+import string
+import subprocess
+import sys
+
+# GLOBALS
+
+# script arguments
+dry_run = False
+verbose = False
+self_test = False
+
+# globals
+total_albums = 0
+id3v2_wrapper = ""
+
+# Verifies that there is an executable script named 'target' in the same 
+# directory as this script. If not, prints an error message and exits.
+def find_companion_script(target):
+    try:
+        mydir = os.path.dirname(sys.argv[0])
+        target_path = mydir + "/" + target
+        statinfo = os.stat(mydir + "/" + target)
+        mode = statinfo[0]
+        if not (mode & stat.S_IEXEC):
+            print "ERROR: " + target + " is not executable"
+            sys.exit(1)
+        return target_path 
+    except Exception, e:
+        print "ERROR: can't find id3v2_wrapper.sh: " + str(e)
+        sys.exit(1)
+
+# Regular expressions for parsing file names--
+# which is, after all, what this program is all about
+music_file_re = re.compile(".*\.mp3$")
+
+music_file_name_re = re.compile(".*/" +
+            "(?P<dir_name>[^/]*)/" +
+            "(?P<track_number>[0123456789][0123456789]) - " +
+            "(?P<track_name>[^/]*)" +
+            "\.[a-zA-Z0123456789]*$")
+
+dir_name_re = re.compile("(.*/)?" +
+            "(?P<artist>[0-9A-Za-z _.\-]*?) - " +
+            "(?P<album>[0-9A-Za-z _(),'.\-\+]*)" + 
+            "(?P<conductor> = [0-9A-Za-z _'.\-]*)?"
+            "(?P<encoding>\[LL\])?$")
+
+def self_test_music_file(m, artist, album_name, \
+                        conductor, track_number, title):
+    if (m.album.artist != artist):
+        print "FAILED: artist: \"" + m.album.artist + "\""
+        print "\tshould be: \"" + artist + "\""
+    if (m.album.name != album_name):
+        print "FAILED: album_name: \"" + m.album.name + "\""
+        print "\tshould be: \"" + album_name + "\""
+    if (m.album.conductor != conductor):
+        print "FAILED: conductor: \"" + m.album.conductor + "\""
+        print "\tshould be: \"" + conductor + "\""
+    if (m.track_number != track_number):
+        print "FAILED: track_number: \"" + int(m.track_number) + "\""
+        print "\tshould be: \"" + str(track_number) + "\""
+    if (m.title != title):
+        print "FAILED: title: \"" + m.title + "\""
+        print "\tshould be: \"" + title + "\""
+
+def run_self_test():
+    m = MusicFile.from_filename("./Mozart - " +
+                "Symphony No 26 in Eb Maj - K161a" + 
+                " = The Academy of Ancient Music" +
+                "/01 - Adagio.mp3")
+    self_test_music_file(m,
+                    artist="Mozart",
+                    album_name="Symphony No 26 in Eb Maj - K161a",
+                    conductor="The Academy of Ancient Music",
+                    track_number=1,
+                    title="Adagio")
+
+
+    m = MusicFile.from_filename("./Tchaikovsky - " +
+                "The Sleeping Beauty - Op. 66" + 
+                " = Sir Charles Mackerras" +
+                "/02 - Scene.mp3")
+    self_test_music_file(m,
+                    artist="Tchaikovsky",
+                    album_name="The Sleeping Beauty - Op. 66",
+                    conductor="Sir Charles Mackerras",
+                    track_number=2,
+                    title="Scene")
+
+    # TODO: move John Cage into Comment or secondary author field here.
+    m = MusicFile.from_filename("./Various - " +
+                "American Classics" +
+                "/12 - John Cage - Prelude for Meditation.mp3")
+    self_test_music_file(m, 
+                    artist="Various",
+                    album_name="American Classics",
+                    conductor="",
+                    track_number=12,
+                    title="John Cage - Prelude for Meditation")
+
+# Given a hash H, creates a hash which is the inverse
+# i.e. if H[k] = v, H'[v] = k
+def reverse_hash(h):
+    ret = dict()
+    i = h.iteritems()
+    while 1:
+        try:
+            k,v = i.next()
+            ret[v] = k
+        except StopIteration:
+            break
+    return ret
+
+def my_system(ignore_ret, *cmd):
+    if (verbose == True):
+        print cmd
+    if (dry_run == False):
+        try:
+            my_env = {"MALLOC_CHECK_" : "0", "PATH" : os.environ.get("PATH")}
+            retcode = subprocess.call(cmd, env=my_env, shell=False)
+            if (retcode < 0):
+                print "ERROR: Child was terminated by signal", -retcode
+            else:
+                if ((not ignore_ret) and (retcode != 0)):
+                    print "ERROR: Child returned", retcode
+        except OSError, e:
+            print "ERROR: Execution failed:", e
+
+# CLASSES
+class FileType(object):
+    def __init__(self, encoding):
+        self.encoding = encoding
+
+class Album(object):
+    def __init__(self, artist, name, conductor, encoding):
+        if (artist == None):
+            raise MusicFileErr("can't have Album.artist = None")
+        if (name == None):
+            raise MusicFileErr("can't have Album.name = None")
+        self.artist = string.rstrip(artist)
+        self.name = string.rstrip(name)
+        if (conductor):
+            i = conductor.find(' = ')
+            self.conductor = conductor[i+len(' = '):]
+        else:
+            self.conductor = ""
+        self.encoding = string.rstrip(encoding) if encoding else ""
+
+    def from_dirname(dirname):
+        match = dir_name_re.match(dirname)
+        if (not match):
+            raise MusicFileErr("can't parse directory name \"" + 
+                                dirname + "\"")
+        return Album(match.group('artist'), match.group('album'), 
+                     match.group('conductor'), match.group("encoding"))
+    from_dirname = staticmethod(from_dirname)
+
+    def to_s(self):
+        ret = self.artist + " - " + self.name
+        if (self.conductor != None):
+            ret += " " + self.conductor
+        if (self.encoding != None):
+            ret += " " + self.encoding
+        return ret
+
+class MusicFileErr(Exception):
+    pass
+
+class MusicFile(object):
+    id3v2_to_attrib = { 'TIT2' : 'self.title',
+                        'TPE1' : 'self.album.artist',
+                        'TALB' : 'self.album.name',
+                        'TRCK' : 'str(self.track_number)',
+                        'TPE3' : 'self.album.conductor',
+                        #'TYER' : 'year'
+                    }
+    attrib_to_id3v2 = reverse_hash(id3v2_to_attrib)
+
+    def __init__(self, filename, album, title, track_number):
+        self.filename = filename
+        self.album = album
+        self.title = title
+        self.track_number = int(track_number)
+
+    def from_filename(filename):
+        match = music_file_name_re.match(filename)
+        if (not match):
+            raise MusicFileErr("can't parse music file name \"" + 
+                            filename + "\"")
+        album = Album.from_dirname(match.group('dir_name'))
+        return MusicFile(filename, album, 
+                        match.group('track_name'),
+                        match.group('track_number'))
+    from_filename = staticmethod(from_filename)
+
+    def to_s(self):
+        ret = self.album.to_s() + "/" + \
+                ("%02d" % self.track_number) + " - " + self.title
+        return ret
+
+    def clear_tags(self):
+        my_system(True, id3v2_wrapper, "--delete-v1", self.filename)
+        my_system(True, id3v2_wrapper, "--delete-v2", self.filename)
+
+    def add_tag(self, att, expr):
+        attribute = "--" + att
+        my_system(False, "id3v2", attribute, expr, self.filename)
+
+    def set_tags(self):
+        i = self.id3v2_to_attrib.iteritems()
+        while 1:
+            try:
+                att,expr = i.next()
+                self.add_tag(att, eval(expr))
+            except StopIteration:
+                break
+# CODE
+
+## Find id3v2_wrapper.sh
+id3v2_wrapper = find_companion_script('id3v2_wrapper.sh')
+
+## Parse options
+def Usage():
+    print os.path.basename(sys.argv[0]) + ": the mp3 tagging program"
+    print
+    print "Usage: " + os.path.basename(sys.argv[0]) + \
+            " [-h][-d][-s] [dirs]"
+    print "-h: this help message"
+    print "-d: dry-run mode"
+    print "-s: self-test"
+    print "dirs: directories to search for albums."
+    print "This program skips dirs with \"[LL]\" in the name."
+    sys.exit(1)
+
+try:
+    optlist, dirs = getopt.getopt(sys.argv[1:], ':dhi:sv')
+except getopt.GetoptError:
+    Usage()
+
+for opt in optlist:
+    if opt[0] == '-h':
+        Usage()
+    if opt[0] == '-d':
+        dry_run = True
+    if opt[0] == '-v':
+        verbose = True
+    if opt[0] == '-s':
+        self_test = True
+
+if (self_test):
+    run_self_test()
+    sys.exit(0)
+
+for dir in dirs:
+    if (re.search("\[LL\]", dir)):
+        print "skipping \"" + dir + "\"..."
+        continue
+    # Assume that paths without a directory prefix are local
+    if ((dir[0] != "/") and (dir.find("./") != 0)):
+        dir = "./" + dir
+
+    # Validate that 'dir' is a directory and we can access the entries
+    # Note: this does not protect against having nested directories with
+    # bad permissions
+    try:
+        entries = os.listdir(dir)
+    except:
+        print "ERROR: cannot stat entries of \"" + dir + "\""
+        continue
+
+    # Process all files in the directory
+    if (verbose):
+        print "******** find -L " + dir + " -noleaf"
+    proc = subprocess.Popen(['find', '-L', dir, '-noleaf'],\
+            stdout=subprocess.PIPE)
+    line = proc.stdout.readline()
+    while line != '':
+        file_name = line.strip()
+        if (music_file_re.match(file_name)):
+            try:
+                m = MusicFile.from_filename(file_name)
+                m.clear_tags()
+                m.set_tags()
+                if (verbose):
+                    print "SUCCESS: " + file_name
+                total_albums = total_albums + 1
+            except MusicFileErr, e:
+                print "ERROR: " + str(e)
+        line = proc.stdout.readline()
+    if (verbose):
+        print "********"
+
+if (dry_run):
+    print "(dry run)",
+print "Successfully processed " + str(total_albums) + " total mp3s"