续上次的,再次折腾专辑封面,请教了江阁之后,江阁给了一个 hack,现在基本可以读取 gbk 的 id3 了,附上现在的代码
#!/usr/bin/env python # -*- coding: utf-8 -*- # 2012.12.3 '''根据豆瓣自动补全专辑封面''' import os, sys, re, time, urllib, eyeD3 reload(sys) sys.setdefaultencoding('utf-8') import logging logging.basicConfig(filename = 'log.txt', filemode = 'w', format = '[%(levelname)s] [%(asctime)s] %(message)s', level = logging.DEBUG) resolve_abnormal = lambda abnormal: "".join(map(chr, map(ord, abnormal))) def fillCover(filename): logging.info('processing %s' % filename) t = eyeD3.Tag() try: t.link(filename) except: logging.error('can not open file') return False images = t.getImages() ''' if len(images) > 0: logging.info('alreay has cover images') return True ''' t.removeImages() title = t.getTitle() title = resolve_abnormal(title) logging.debug('title is %s' % title) album = t.getAlbum() album = resolve_abnormal(album) logging.debug('album is %s' % album) artist = t.getArtist() artist = resolve_abnormal(artist) logging.debug('artist is %s' % artist) if title or artist or album: keyword = ' '.join([title, artist, album]) else: logging.debug('no enough id3 tags') keyword = filename.split('\')[-1].split('.')[0] keyword = keyword logging.debug('keyword is %s' % keyword) doubanSearchAPI = 'http://api.douban.com/v2/music/search?q={0}' request = doubanSearchAPI.format(urllib.quote(keyword.decode('gbk').encode('utf-8'))) logging.debug('request is %s' % request) result = urllib.urlopen(request).read() #logging.debug('result is %s' % result) # http://developers.douban.com/wiki/?title=api_v2 #logging.warn('sleep for 6 sec') #time.sleep(6) if not result: return False doubanCoverPattern = 's(d+).jpg' doubanCoverURL = 'http://img3.douban.com/lpic/s{0}.jpg' match = re.search(doubanCoverPattern, result, re.IGNORECASE) if match: coverFileURL = doubanCoverURL.format(match.groups()[0]) logging.debug('cover image url is %s' % coverFileURL) else: logging.debug('no cover image url matched') return False try: logging.debug('downloading cover image file') #coverFileName = ' - '.join([artist, album])+'.jpg' coverFileName = 'cover//' + ' - '.join([artist, album]) + '.jpg' f = file(coverFileName, 'wb') f.write(urllib.urlopen(coverFileURL).read()) f.close() logging.debug('download finished') except: logging.error('download error') try: logging.debug('adding image') t.addImage(3, coverFileName, u'') t.update() logging.info('successfully add image') return True except: logging.error('add image error') return False def main(): if len(sys.argv) < 2: print 'usage: %s /path/to/your/music/folder/' % __file__ return False for i in os.listdir(sys.argv[1]): fillCover(os.path.join(sys.argv[1], i)) if __name__ == '__main__': main()
但是进一步的探索发现一个神奇的问题,有些歌曲是拖不进 itunes 的,而且,这种歌曲的 id3 必须转成 apev2,才能正确的出现标签的歌手和专辑名称,以及可以出现专辑封面,我发现这些歌曲一般是来自 google music,不知道是不是用了什么数字水印技术