Python eyeD3 库的乱码问题初步

续上次的，再次折腾专辑封面，请教了江阁之后，江阁给了一个 hack，现在基本可以读取 gbk 的 id3 了，附上现在的代码

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# 2012.12.3

'''根据豆瓣自动补全专辑封面'''

import os, sys, re, time, urllib, eyeD3

reload(sys)
sys.setdefaultencoding('utf-8')

import logging
logging.basicConfig(filename = 'log.txt', 
                    filemode = 'w', 
                    format = '[%(levelname)s] [%(asctime)s] %(message)s', 
                    level = logging.DEBUG)

resolve_abnormal = lambda abnormal: "".join(map(chr, map(ord, abnormal)))

def fillCover(filename):
    logging.info('processing %s' % filename)
    t = eyeD3.Tag()

    try:
        t.link(filename)
    except:
        logging.error('can not open file')
        return False

    images = t.getImages()
    '''
    if len(images) > 0:
        logging.info('alreay has cover images')
        return True
    '''
    t.removeImages()

    title = t.getTitle()
    title = resolve_abnormal(title)
    logging.debug('title is %s' % title)
    album = t.getAlbum()
    album = resolve_abnormal(album)
    logging.debug('album is %s' % album)
    artist = t.getArtist()
    artist = resolve_abnormal(artist)
    logging.debug('artist is %s' % artist)

    if title or artist or album:
        keyword = ' '.join([title, artist, album])
    else:
        logging.debug('no enough id3 tags')
        keyword = filename.split('\')[-1].split('.')[0]
        keyword = keyword
    logging.debug('keyword is %s' % keyword)

    doubanSearchAPI = 'http://api.douban.com/v2/music/search?q={0}'
    request = doubanSearchAPI.format(urllib.quote(keyword.decode('gbk').encode('utf-8')))
    logging.debug('request is %s' % request)
    result = urllib.urlopen(request).read()
    #logging.debug('result is %s' % result)

    # http://developers.douban.com/wiki/?title=api_v2
    #logging.warn('sleep for 6 sec')
    #time.sleep(6)

    if not result:
        return False
    doubanCoverPattern = 's(d+).jpg'
    doubanCoverURL = 'http://img3.douban.com/lpic/s{0}.jpg'
    match = re.search(doubanCoverPattern, result, re.IGNORECASE)
    if match:
        coverFileURL = doubanCoverURL.format(match.groups()[0])
        logging.debug('cover image url is %s' % coverFileURL)
    else:
        logging.debug('no cover image url matched')
        return False

    try:
        logging.debug('downloading cover image file')
        #coverFileName = ' - '.join([artist, album])+'.jpg'
        coverFileName = 'cover//' + ' - '.join([artist, album]) + '.jpg'
        f = file(coverFileName, 'wb')
        f.write(urllib.urlopen(coverFileURL).read())
        f.close()
        logging.debug('download finished')
    except:
        logging.error('download error')

    try:
        logging.debug('adding image')
        t.addImage(3, coverFileName, u'')
        t.update()
        logging.info('successfully add image')
        return True
    except:
        logging.error('add image error')
        return False


def main():
    if len(sys.argv) < 2:
        print 'usage: %s /path/to/your/music/folder/' % __file__
        return False
    for i in os.listdir(sys.argv[1]):
        fillCover(os.path.join(sys.argv[1], i))

if __name__ == '__main__':
    main()

但是进一步的探索发现一个神奇的问题，有些歌曲是拖不进 itunes 的，而且，这种歌曲的 id3 必须转成 apev2，才能正确的出现标签的歌手和专辑名称，以及可以出现专辑封面，我发现这些歌曲一般是来自 google music，不知道是不是用了什么数字水印技术

ZRJ

学习笔记

Python eyeD3 库的乱码问题初步

Leave a Reply Cancel reply