diff --git a/load_and_search.py b/load_and_search.py new file mode 100644 index 0000000..e894a14 --- /dev/null +++ b/load_and_search.py @@ -0,0 +1,42 @@ +import datetime +import os.path +import pprint +import time + +from search_on_itunes import search_on_itunes_v2 +import pandas + +pandas.options.display.max_rows = None +pandas.options.display.max_columns = None +pandas.options.display.width = 6000 +pandas.options.display.max_colwidth = 6000 +pandas.options.display.colheader_justify = 'left' + +df = pandas.read_excel(os.path.join(os.getcwd(), 'merged.xlsx'), sheet_name='Sheet1', index_col=0) + +for _, col in df.iterrows(): + url, artist_name, release_name, release_type, release_date, record_label, song_name, length, lyrics, composition, arrangement = col + print(artist_name) + print(release_name) + print(song_name) + print(int(str(length).split(':')[0]) * 60 + int(str(length).split(':')[1])) + print(datetime.datetime.strptime(release_date, '%Y/%m/%d').date()) + print('\n\n\n') + response = search_on_itunes_v2(artist_name=artist_name, album_name=release_name, song_name=song_name, + released_date=datetime.datetime.strptime(release_date, '%Y/%m/%d').date(), + length=int(str(length).split(':')[0]) * 60 + int(str(length).split(':')[1]), + debug=False) + pprint.pprint(response) + # time.sleep(1) + + # time.sleep(0.3) + + # search_result = search_on_itunes(search_keyword=song_name, artist_keyword=artist_name, debug=True) + # # pprint.pprint(search_result) + # album_name, _, itunes_song_name, _, itunes_artist_name, _, itunes_page, artwork_url, google_link, json_data, album_json = search_result + # print(album_name) + # print(itunes_song_name) + # print(itunes_artist_name) + # print(itunes_page) + # print(artwork_url) + # print('\n\n\n\n\n') diff --git a/merge_hp_uf.py b/merge_hp_uf.py index 721f3aa..97fc6f5 100644 --- a/merge_hp_uf.py +++ b/merge_hp_uf.py @@ -2,8 +2,8 @@ import pandas -hp_df = pandas.read_excel(os.path.join(os.getcwd(), 'hp.xlsx'), sheet_name='Sheet1') -uf_df = pandas.read_excel(os.path.join(os.getcwd(), 'uf.xlsx'), sheet_name='Sheet1') +hp_df = pandas.read_excel(os.path.join(os.getcwd(), 'hp.xlsx'), sheet_name='Sheet1',index_col=0) +uf_df = pandas.read_excel(os.path.join(os.getcwd(), 'uf.xlsx'), sheet_name='Sheet1',index_col=0) merged_df = pandas.concat([hp_df, uf_df]) diff --git a/search_on_itunes.py b/search_on_itunes.py index d42ae08..75f68a1 100644 --- a/search_on_itunes.py +++ b/search_on_itunes.py @@ -2,6 +2,8 @@ import difflib import html import json +import operator +import os import pprint import random import re @@ -40,7 +42,89 @@ def safe_request_get_as_text(url, header=''): return html.unescape(unicodedata.normalize('NFKC', text)) -def search_on_itunes(search_keyword, artist_keyword=""): +def search_on_itunes_v2(song_name='', album_name='', artist_name='', length=0, released_date=datetime.date.today(), + debug=False): + if song_name != '': + print('曲名: ' + song_name) + if album_name != '': + print('収録アルバム名: ' + album_name) + if artist_name != '': + print('アーティスト名: ' + artist_name) + if song_name == '' and album_name == '': + return KeyError + if debug is False: + sys.stdout = open(os.devnull, 'w', encoding='UTF-8') + + song_name = unicodedata.normalize('NFKC', song_name) + album_name = unicodedata.normalize('NFKC', album_name) + + if song_name[-1:].isascii() or song_name[-1:].isdigit(): + song_name += ' ' + if song_name[0].isascii() or song_name[0].isdigit(): + song_name = ' ' + song_name + song_name = mojimoji.zen_to_han(song_name, ascii=False, kana=False) + + result_json = json.loads(safe_request_get_as_text( + "https://itunes.apple.com/search?term=" + song_name + + "&media=music&entity=song&attribute=songTerm&country=jp&lang=ja_jp&limit=10&GenreTerm=J-Pop&sort=recent")) + # pprint.pprint(json.loads(result_json)) + sort_list = [] + if not result_json['results']: + return [] + for content in result_json['results']: + pprint.pprint(content) + print('収録アルバム: ' + content['collectionName']) + itunes_released_date = datetime.datetime.fromisoformat( + str(content['releaseDate']).replace('Z', '+00:00')).date() + print(itunes_released_date) + print('アーティスト名: ' + content['artistName']) + print(content['artistViewUrl']) + print(str(content['artworkUrl100']).replace('100x100', '5000x5000')) + print(content['collectionViewUrl']) + print('曲名: ' + content['trackName']) + print('長さ: ' + str(int(content['trackTimeMillis'] / 1000)) + '秒') + collection_name_diff = difflib.SequenceMatcher(None, str(content['collectionName']) + .replace('- Single', '').replace(' - EP', ''), + album_name).ratio() + print() + print('album name diff inverted:', end='') + print(collection_name_diff) + print('released day diff: ', end='') + print(1 / (abs((itunes_released_date - released_date).days) + 1)) + artist_name_diff = difflib.SequenceMatcher(None, content['artistName'], artist_name).ratio() + print('artist name diff: ', end='') + print(artist_name_diff) + print('length diff: ', end='') + print(1 / (abs(int(content['trackTimeMillis'] / 1000) - length) + 1)) + print(collection_name_diff + + 1 / (abs((itunes_released_date - released_date).days) + 1) + + artist_name_diff + + 1 / (abs(int(content['trackTimeMillis'] / 1000) - length) + 1)) + + sort_list.append([content, collection_name_diff + + 1 / (abs((itunes_released_date - released_date).days) + 1) + + artist_name_diff + + 1 / (abs(int(content['trackTimeMillis'] / 1000) - length) + 1)]) + print("\n\n\n") + pprint.pprint(sorted(sort_list, key=operator.itemgetter(1))[-1]) + sys.stdout = sys.__stdout__ + return sorted(sort_list, key=operator.itemgetter(1))[-1] + + +print(search_on_itunes_v2(song_name='おねがいネイル', album_name='モーニング刑事。', length=249, + released_date=datetime.datetime.strptime('1998/09/30', '%Y/%m/%d').date(), + artist_name='モーニング娘。&平家みちよ', + debug=True)) + +print(search_on_itunes_v2(song_name='My Days for You', album_name='真野恵里菜', length=261, + released_date=datetime.datetime.strptime('2011/06/29', '%Y/%m/%d').date(), + artist_name='真野恵里菜', + debug=True)) + + +def search_on_itunes(search_keyword, artist_keyword="", debug=False): + if debug is False: + sys.stdout = open(os.devnull, 'w', encoding='UTF-8') if search_keyword == '': return [] # original_keyword = search_keyword @@ -186,6 +270,7 @@ def search_on_itunes(search_keyword, artist_keyword=""): album_json = json.loads(safe_request_get_as_text( "https://itunes.apple.com/lookup?country=jp&lang=ja_jp&id=" + str(result["collectionId"])))["results"][0] # album_json = unicodedata.normalize('NFKC', album_json) + print(json.dumps(album_json, indent=4, ensure_ascii=False)) print('\n\n') @@ -201,6 +286,8 @@ def search_on_itunes(search_keyword, artist_keyword=""): "https://itunes.apple.com/lookup?country=jp&lang=ja_jp&id=" + str(result["artistId"])).text print(json.dumps(json.loads(artist_json)["results"][0], indent=4, ensure_ascii=False)) + sys.stdout = sys.__stdout__ + return [result["collectionName"], result["collectionId"], result["trackName"], result["trackId"], result["artistName"], result["artistId"],