续上次的,再次折腾专辑封面,请教了江阁之后,江阁给了一个 hack,现在基本可以读取 gbk 的 id3 了,附上现在的代码
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# 2012.12.3
'''根据豆瓣自动补全专辑封面'''
import os, sys, re, time, urllib, eyeD3
reload(sys)
sys.setdefaultencoding('utf-8')
import logging
logging.basicConfig(filename = 'log.txt',
filemode = 'w',
format = '[%(levelname)s] [%(asctime)s] %(message)s',
level = logging.DEBUG)
resolve_abnormal = lambda abnormal: "".join(map(chr, map(ord, abnormal)))
def fillCover(filename):
logging.info('processing %s' % filename)
t = eyeD3.Tag()
try:
t.link(filename)
except:
logging.error('can not open file')
return False
images = t.getImages()
'''
if len(images) > 0:
logging.info('alreay has cover images')
return True
'''
t.removeImages()
title = t.getTitle()
title = resolve_abnormal(title)
logging.debug('title is %s' % title)
album = t.getAlbum()
album = resolve_abnormal(album)
logging.debug('album is %s' % album)
artist = t.getArtist()
artist = resolve_abnormal(artist)
logging.debug('artist is %s' % artist)
if title or artist or album:
keyword = ' '.join([title, artist, album])
else:
logging.debug('no enough id3 tags')
keyword = filename.split('\')[-1].split('.')[0]
keyword = keyword
logging.debug('keyword is %s' % keyword)
doubanSearchAPI = 'http://api.douban.com/v2/music/search?q={0}'
request = doubanSearchAPI.format(urllib.quote(keyword.decode('gbk').encode('utf-8')))
logging.debug('request is %s' % request)
result = urllib.urlopen(request).read()
#logging.debug('result is %s' % result)
# http://developers.douban.com/wiki/?title=api_v2
#logging.warn('sleep for 6 sec')
#time.sleep(6)
if not result:
return False
doubanCoverPattern = 's(d+).jpg'
doubanCoverURL = 'http://img3.douban.com/lpic/s{0}.jpg'
match = re.search(doubanCoverPattern, result, re.IGNORECASE)
if match:
coverFileURL = doubanCoverURL.format(match.groups()[0])
logging.debug('cover image url is %s' % coverFileURL)
else:
logging.debug('no cover image url matched')
return False
try:
logging.debug('downloading cover image file')
#coverFileName = ' - '.join([artist, album])+'.jpg'
coverFileName = 'cover//' + ' - '.join([artist, album]) + '.jpg'
f = file(coverFileName, 'wb')
f.write(urllib.urlopen(coverFileURL).read())
f.close()
logging.debug('download finished')
except:
logging.error('download error')
try:
logging.debug('adding image')
t.addImage(3, coverFileName, u'')
t.update()
logging.info('successfully add image')
return True
except:
logging.error('add image error')
return False
def main():
if len(sys.argv) < 2:
print 'usage: %s /path/to/your/music/folder/' % __file__
return False
for i in os.listdir(sys.argv[1]):
fillCover(os.path.join(sys.argv[1], i))
if __name__ == '__main__':
main()
但是进一步的探索发现一个神奇的问题,有些歌曲是拖不进 itunes 的,而且,这种歌曲的 id3 必须转成 apev2,才能正确的出现标签的歌手和专辑名称,以及可以出现专辑封面,我发现这些歌曲一般是来自 google music,不知道是不是用了什么数字水印技术