copycat.py

import argparse
import json
import os
import shutil
import sys
import threading
import time
import urllib
import urllib.request

import eyed3
import requests
import spotipy
import youtube_dl
from bs4 import BeautifulSoup
from spotipy.oauth2 import SpotifyClientCredentials
from youtubesearchpython import VideosSearch

configs = {
    'threads': 1,  # use this many downloads at once! super duper fast! consumes CPU like its cake!
    'concurrent_connections': 2,  # threaded spotify connections,
    'download_dir': './downloads/',  # Downloaded songs go here.
    'sync_download_dir': [  # Sync the downloaded songs with these directories
        'G:/MUSIC/spotify/',
    ],
    'song_selection': {
        'use_filtering': False,
        'edge_cases': ['remix', 'live', 'instrumental', 'cover', 'how to', 'tutorial', 'concert',
                       'reimagined', 'bass boost', 'boosted', 'explained', 'slowed', 'karaoke',
                       'datamosh', 'show', '3d', 'dance', 'unplugged', 'behind', 'festival',
                       'chipmunks', 'preview', 'mashup', 'feat', 'bass', 'acoustic', 'session',
                       ' vs ', 'sings', 'grammy', 'parody', 'decoded', 'lyrics',
                       'performance', '8d', 'chipmunks', 'bass boosted', 'clean'],
        # ignore songs that contain these words,
        'min_percent_threshold': 60,  # if a song title is more than 5 words, check if % if it matches
        'diff_track_seconds_limit': 5,  # limit duration comparision for top 2 songs
        'append_search_term': '',  # append some terms for search
    },
    'youtube_username': None,  # Cant download ? try this
    'youtube_password': None,  # 🙈
    'tag_mp3': True,  # sure, why would you not?
    'spotify': {  # you know what
        'client_id': 'ea59966691f546a38c800e765338cf31',
        'client_secret': 'a99b13c2324340939cca0a6d71f91bc3'
    },
    'playlist': {
        'spotify_parsed': [],  # for internal use, dont worry
        'spotify': [
            'https://open.spotify.com/playlist/4ciFNzOZXjdW4SYUO9d2vV?si=1fd8b11fce14479b',
            # [
            #     'https://open.spotify.com/track/4yrloqLBKS7Pi3z7g2HDga?si=SFhC5k1qTLuf1ByrBlKWCA',
            #     'https://open.spotify.com/track/5EzGOkUwkRUXYAyvjlEHah?si=iJF3_Y-zQkKAhOLkhU6ScQ',
            #     'https://open.spotify.com/track/5rfNNdaCovygh3cxY1txTR?si=oIEDqPtERny1y0uAekcpVA',
            #     'https://open.spotify.com/track/4qRHWM6lESs5vqNmTJrhum?si=86y-wkA8QlyMi_dz1KMIrw',
            #     'https://open.spotify.com/track/56nziqLKNZ3METexiH6zdF?si=Dwak3g3HQNm1twoPkS-VGw',
            #     'https://open.spotify.com/track/0mXu9RFixtjgppxSvcYcYI?si=gO2c-7gpQNm3Ny_AYfnhoQ',
            #     'https://open.spotify.com/track/5j3iBuHq6vv7VcBo4Y2QrK?si=L1-hZJsMQBe0c2MN3MSOIA',
            #     'https://open.spotify.com/track/40mphbjHvSisMvIAmSfpBX?si=toJPcffsSLmfG5llqbIxgw',
            #     'https://open.spotify.com/track/5MjMvRHjOOIV6tA7OmC7mj?si=o7NtShzHRCWYoUMhMqNP4A',
            # ],
        ]
    }
}

parser = argparse.ArgumentParser(description="🎷 Sync your Spotify music with your MP3 player!")
parser.add_argument("-s", help="process playlist, download, and sync with target drive", action='store_true')
parser.add_argument("-ds", help="sync downloaded files with your target drive only", action='store_true')
# parser.add_argument("-r", help="loop the process after 2 hrs", action='store_true')
parser.add_argument("-v", help="get more output?", action='store_true')
parser.add_argument("-d", help="Developer use only, for debug", action='store_true')
args = parser.parse_args()

client_credentials_manager = SpotifyClientCredentials(configs['spotify']['client_id'],
                                                      configs['spotify']['client_secret'])
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)


def getOS():
    import os
    if os.name == 'nt':
        return 'win'
    return 'linux'


def p(print_string):
    """
    Print stuff ?
    :rtype: object
    """
    print(print_string)


def search_youtube(text_to_search):
    """
    Search the text on youtube and return its parsed results with title, channel name, desc and duration
    :rtype: object
    """
    # query = urllib.parse.quote(text_to_search)
    # url = "https://www.youtube.com/results?search_query=" + query
    videosSearch = VideosSearch(text_to_search, limit=2)
    results = videosSearch.result()
    results = results['result']

    #
    # try:
    #     response = urllib.request.urlopen(url)
    #     html = response.read()
    #     html = str(html, 'utf-8')
    # except Exception as e:
    #     p('😥 Youtube gave up, this is so sad, can we get 1 like ' + repr(e))
    #     return []
    #
    # # find and get video id from html string.
    # start_string = 'var ytInitialData = '
    # end_string = ']};</script><script nonce='
    #
    # start_position = html.find(start_string)
    # start_position += len(start_string)
    #
    # end_position = html.find(end_string)
    #
    # # get the youtube object
    # object_string = html[start_position: end_position + 3]
    #
    # # trim the end and remove the last ; semi colon
    # my_fav_object = object_string.strip()[0:-1]
    #
    # fav_object = json.loads(my_fav_object)
    #
    # list = \
    #     fav_object['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'][
    #         0][
    #         'itemSectionRenderer']['contents']
    #
    # selected_video = False

    video_list = []

    for item in results:
        if item['descriptionSnippet']:
            desc = item['descriptionSnippet'][0]['text']
        else:
            desc = ''
        video_list.append({
            'title': item['title'],
            'channel': item['channel']['name'],
            'description': desc,
            'href': item['link'],
            'video_id': item['id'],
            'duration': item['duration'],
            'duration_seconds': give_me_seconds(item['duration'])
        })
    # for item in list:
    #     if 'videoRenderer' in item:
    #         videoId = item['videoRenderer']['videoId']
    #         title = item['videoRenderer']['title']['runs'][0]['text']
    #         time = item['videoRenderer']['lengthText']['simpleText']
    #         description = ''
    #         if 'descriptionSnippet' in item['videoRenderer']:
    #             description = item['videoRenderer']['descriptionSnippet']['runs'][0]['text']
    #         channel_name = item['videoRenderer']['ownerText']['runs'][0]['text']
    #         seconds = give_me_seconds(time)
    #         # selected_video = {
    #         #     'video_id': videoId,
    #         #     'title': title,
    #         #     'time': this_video_seconds,
    #         #     'description': description,
    #         #     'channel_name': channel_name
    #         # }
    #         video_list.append({
    #             'title': title,
    #             'channel': channel_name,
    #             'description': description,
    #             'href': '',
    #             'video_id': videoId,
    #             'duration': time,
    #             'duration_seconds': seconds
    #         })

    # page = BeautifulSoup(html, features='lxml')
    # vid_list = page.find_all('div', attrs={'class': 'yt-lockup-content'})
    #
    # for vid in vid_list:
    #
    #     title_link = vid.findChild('a', attrs={'class': 'yt-uix-tile-link'}, recursive=True)
    #     if title_link is None:
    #         continue
    #
    #     title = title_link.attrs['title']
    #     href = title_link.attrs['href']
    #
    #     duration_el = vid.findChild('span', attrs={'class': 'accessible-description'}, recursive=True)
    #     if duration_el is None:
    #         continue
    #
    #     duration = duration_el.text
    #
    #     channel_name = ''
    #     channel_name_el = vid.findChild('a', attrs={'class': 'yt-uix-sessionlink'}, recursive=True)
    #     if channel_name_el is None:
    #         channel_name = channel_name_el.text
    #
    #     video_description_el = vid.findChild('div', attrs={'class': 'yt-lockup-description'}, recursive=True)
    #     video_description = ''
    #     if video_description_el is not None:
    #         video_description = video_description_el.text
    #
    #     if duration.find('Duration') == -1:
    #         continue
    #
    #     duration_parsed = duration[duration.find(':') + 2:-1]
    #     # not parsing hour long stuff right now: example: 1:01:49
    #     # if the target video is more than 1 hr, consider it has 1 hr.
    #     if len(duration_parsed) > 5:
    #         duration_parsed = '59:59'
    #
    #     duration_in_seconds = int(duration_parsed[int(duration_parsed.find(':')) + 1:])
    #     duration_in_minutes = int(duration_parsed[:duration_parsed.find(':')])
    #     total_duration_in_seconds = duration_in_seconds + (duration_in_minutes * 60)
    #     video_id = href[href.find('?v=') + 3:]
    #     video_list.append({
    #         'title': title,
    #         'channel': channel_name,
    #         'description': video_description,
    #         'href': href,
    #         'video_id': video_id,
    #         'duration': duration_parsed,
    #         'duration_seconds': total_duration_in_seconds
    #     })

    return video_list


def give_me_seconds(time):
    time_array = time.split(':')
    time_array.reverse()
    # time_array.
    c = len(time_array) - 1
    seconds = 0
    while c >= 0:
        sec = int(time_array[c])
        c2 = c
        while (c2):
            sec *= 60
            c2 -= 1
        seconds += sec
        c -= 1

    return seconds


def download_video(video_id, file_name):
    """
    Download the audio format 251, and store it in file_name

    :rtype: object -> file name
    """
    ydl_opts = {
        'format': '251/best',
        'outtmpl': './' + file_name + '.webm',
    }
    if configs['youtube_username'] is not None:
        ydl_opts['username'] = configs['youtube_username']
    if configs['youtube_password'] is not None:
        ydl_opts['password'] = configs['youtube_password']

    a = youtube_dl.YoutubeDL(ydl_opts)
    l ='https://www.youtube.com/watch?v=' + video_id
    p(l)
    v = a.download([l])
    return './' + file_name + '.webm'


def convert_to_mp3(source, target):
    """
    Convert the downloaded webm file to mp3
    :rtype: object
    """
    source = source.replace('/', '\\')
    target = target.replace('/', '\\')

    # fnull = open(os.devnull, 'w')
    # subprocess.call('.\\ffmpeg\\bin\\ffmpeg.exe -threads 6 -i "' + source + '" -vn -ab 128k -ar 44100 -y "' + target + '"', shell=True, stdout=fnull, stderr=subprocess.STDOUT)
    if getOS() == 'win':
        os.system(
            '".\\ffmpeg\\bin\\ffmpeg.exe -hide_banner -i "' + source + '" -vn -ab 160k -ar 44100 -y "' + target + '""')
    else:
        os.system(
            '"ffmpeg -hide_banner -i "' + source + '" -vn -ab 160k -ar 44100 -y "' + target + '""')


def tag_mp3(file_path, track):
    """
    tag that mp3, insert artist, album, track names and album art.
    :rtype: object
    """
    f = eyed3.load(file_path)
    if f.tag is None:
        f.initTag()

    if track['album_art'] is not None:
        content = requests.get(track['album_art']).content
        f.tag.images.set(3, content, 'image/jpeg')

    f.tag.comments.set(track['search_term'] + ' = ' + track['selected_result'])
    f.tag.artist = track['artist']
    f.tag.album = track['album']
    f.tag.album_artist = track['artist']
    f.tag.title = track['name']
    f.tag.track_num = track['number']
    f.tag.save(None, (2, 3, 0))


def clean_string(filename):
    """
    Clean the string, only keep alnum, spaces and -
    :param filename:
    :return:
    """
    whitelist = set('abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-')
    filename = ''.join(filter(whitelist.__contains__, filename))
    filename = filename.lower().strip()
    return filename


get_spotify_playlist_threads = 0
get_spotify_playlist = []


def get_spotify_playlist(spotify_playlist):
    global get_spotify_playlist
    global get_spotify_playlist_threads
    get_spotify_playlist = []

    for playlist_info in spotify_playlist:
        def get_playlist(playlist_info2):

            global get_spotify_playlist_threads
            global get_spotify_playlist

            if 'user' not in playlist_info2:
                playlist_single_info = {
                    'name': 'songs',
                    'path': 'songs/',
                    'tracks': playlist_info2['song_ids'],
                    'playlist_id': False,
                    'type': 'spotify',
                    'user_id': False,
                    'user_name': False
                }
            else:
                try:
                    info = sp.user_playlist(playlist_info2['user'], playlist_info2['playlist_id']);
                except:
                    p("\n Failed to get playlist " + playlist_info2['playlist_id'])
                    os._exit(1)

                owner_name = info['owner']['display_name']
                p('Fetching playlist information for ✔ id:' + owner_name + ' playlist: ' + info['name'])
                path = clean_string(owner_name[:6] + '-' + info['name'])

                playlist_single_info = {
                    'name': info['name'],
                    'path': path + '/',
                    'tracks': [],
                    'playlist_id': info['id'],
                    'type': 'spotify',
                    'user_id': info['owner']['id'],
                    'user_name': info['owner']['display_name']
                }

            get_spotify_playlist.append(playlist_single_info)
            get_spotify_playlist_threads -= 1

        while get_spotify_playlist_threads > configs['concurrent_connections']:
            time.sleep(.1)

        get_playlist(playlist_info)
        get_spotify_playlist_threads += 1
        # t = threading.Thread(target=get_playlist, args=(playlist_info,))
        # t.daemon = True
        # t.start()

    while get_spotify_playlist_threads != 0:
        time.sleep(.1)

    return get_spotify_playlist


def get_spotify_tracks_individualy(tracks2):
    trackIds = []
    trackGroups = []
    tracks = {'tracks': []}
    limit = 10
    p('')
    for t in tracks2:
        parts = t.split('?')[0].split('/')
        id = parts[len(parts) - 1]
        trackIds.append(id)
        if len(trackIds) == limit:
            trackGroups.append(trackIds)
            trackIds = []

    p('Made ' + str(len(trackGroups)) + ' track groups from tracks')
    for g in trackGroups:
        p('getting tracks for group')
        t3 = sp.tracks(g)['tracks']
        tracks['tracks'] = tracks['tracks'] + t3

    parsed_tracks = []
    for t in tracks['tracks']:
        if t is None or 'name' not in t:
            continue

        track_name = t['name']
        artist_name = t['artists'][0]['name']
        album_name = t['album']['name']
        path = clean_string(artist_name + '-' + track_name)

        def compose_term(term, lim):
            composed_terms = []
            index = 0
            for t in term.split(' '):
                if len(t) > 1:
                    if index <= lim:
                        composed_terms.append('"' + t + '"')  # make strict search for first 5 words
                        index += 1
                    else:
                        composed_terms.append('' + t + '')  # not so strict search for later words

            return ' '.join(composed_terms)

        composed_term = compose_term(clean_string(artist_name), 2) + ' ' + compose_term(clean_string(track_name), 4)
        search_term = composed_term + ' ' + configs['song_selection']['append_search_term']

        track = {
            'name': track_name,
            'search_term': search_term,
            'artist': artist_name,
            'album': album_name,
            'path': path + '.mp3',
            'number': t['track_number'],
            'id': t['id'],
            'duration': int(t['duration_ms']) / 1000,
            'disc_number': str(t['disc_number']),
            'artist_id': t['artists'][0]['id'],
            'release_date': t['album']['release_date'],
        }

        images = t['album']['images']
        if len(images) > 1:
            image = t['album']['images'][1]['url']
        elif len(images) == 1:
            image = t['album']['images'][0]['url']
        else:
            image = None

        track['album_art'] = image

        parsed_tracks.append(track)

    return parsed_tracks
    # sp.tracks()


def get_spotify_tracks(user_id, playlist_id):
    """
    tracks
    :param user_id:
    :param playlist_id:
    :return:
    """
    # @todo implement tracks gathering for more than 100 tracks, pagination pending

    tracks = sp.user_playlist_tracks(user_id, playlist_id, None, 100, 0)
    parsed_tracks = []
    for t in tracks['items']:
        track_name = t['track']['name']
        artist_name = t['track']['artists'][0]['name']
        album_name = t['track']['album']['name']
        path = clean_string(artist_name + '-' + track_name)

        def compose_term(term, lim):
            composed_terms = []
            index = 0
            for t in term.split(' '):
                if len(t) > 1:
                    if index <= lim:
                        composed_terms.append('"' + t + '"')  # make strict search for first 5 words
                        index += 1
                    else:
                        composed_terms.append('' + t + '')  # not so strict search for later words

            return ' '.join(composed_terms)

        composed_term = compose_term(clean_string(artist_name), 2) + ' ' + compose_term(clean_string(track_name), 4)
        search_term = composed_term + ' ' + configs['song_selection']['append_search_term']

        track = {
            'name': track_name,
            'search_term': search_term,
            'artist': artist_name,
            'album': album_name,
            'path': path + '.mp3',
            'number': t['track']['track_number'],
            'id': t['track']['id'],
            'duration': int(t['track']['duration_ms']) / 1000,
            'disc_number': str(t['track']['disc_number']),
            'artist_id': t['track']['artists'][0]['id'],
            'release_date': t['track']['album']['release_date'],
        }

        images = t['track']['album']['images']
        if len(images) > 1:
            image = t['track']['album']['images'][1]['url']
        elif len(images) == 1:
            image = t['track']['album']['images'][0]['url']
        else:
            image = None

        track['album_art'] = image

        parsed_tracks.append(track)

    return parsed_tracks


def parse_spotify_playlist_config():
    playlist = configs['playlist']['spotify']

    for pl in playlist:
        if not isinstance(pl, str):
            songIds = []
            # loop over the songs url list, and store ids
            configs['playlist']['spotify_parsed'].append({
                'type': 'songs_list',
                'song_ids': pl,
            })
        else:
            user = pl[pl.find('user:') + 5:pl.find('playlist:') - 1]
            pl_id = pl[pl.find('playlist:') + 9:]
            configs['playlist']['spotify_parsed'].append({
                'user': user,
                'playlist_id': pl_id,
                'type': 'playlist',
            })


def process_diff_files(diff, source, dest):
    files_to_remove = diff['files_to_remove']
    files_to_add = diff['files_to_add']
    for r in files_to_remove:
        d = dest + r
        try:
            os.remove(d)
            p('Removed file: ' + d)
            dirs = d[:d.rfind('/')]
            remove_dir_if_empty(dirs)
        except:
            p("Hmm could not remove the file or dir")

    t = len(files_to_add)
    for f in files_to_add:
        d = dest + f
        dirs = d[:d.rfind('/')]
        if not os.path.exists(dirs + '/'):
            p('Creating folder ' + dirs)
            os.makedirs(dirs)
        if not os.path.exists(dest + f):
            if not os.path.exists(source + f):
                p('The source file ' + f + ' does not exists')
            else:
                p('Copying file ' + str(t) + '/' + str(len(files_to_add)) + ' - ' + dest + f)
                shutil.copyfile(source + f, dest + f)
        else:
            p('Already exists ' + str(t) + '/' + str(len(files_to_add)) + ' - ' + dest + f)
        t -= 1

    p('Files are in sync!')


def remove_dir_if_empty(a):
    files = os.listdir(a)
    if len(files) == 0:
        d = a[:a.rfind('/')]
        p('Removing folder because its empty ' + a)
        os.removedirs(a)


def diff_files(files_dir, compare_dir, files=None):
    dirs = os.listdir(compare_dir)

    if files is None:
        files = []
        f_dirs = os.listdir(files_dir)
        for d in f_dirs:
            f_files = os.listdir(files_dir + d)
            for f2 in f_files:
                files.append(d + '/' + f2)

    files_to_remove = []
    files_to_add = []

    for l in dirs:
        folder = l + '/'
        disk_files = os.listdir(compare_dir + folder)

        for df in disk_files:
            file = folder + df
            found = False
            for f in files:
                if file == f:
                    found = True
                    break

            if not found:
                files_to_remove.append(file)

    for f in files:
        exists = os.path.exists(compare_dir + f)
        if not exists:
            files_to_add.append(f)

    o = {
        'files_to_remove': files_to_remove,
        'files_to_add': files_to_add,
    }
    # print(o)
    return o


running_threads = 0
total_playlist_cd = 0
total_playlist = 0
total_tracks_cd = 0
total_tracks = 0


def p2(s):
    p('pl:' + str(total_playlist_cd) + '/' + str(total_playlist) + '-tracks:' + str(total_tracks_cd) + '/' + str(
        total_tracks) + ' - ' + s)


def clean_temp():
    p('Cleaning temp')
    files = os.listdir('./')
    for f in files:
        if f.find('.webm') > -1:
            p('Removing temp file: ' + f)
            os.remove('./' + f)


process_playlist_threads = 0
parsed_playlist = []
hr = '───────────────────'


def process_playlist():
    p('Starting sync')
    parse_spotify_playlist_config()
    p('Download dir: ' + configs['download_dir'])

    if not os.path.exists(configs['download_dir']):
        p('The download directory does not exists')
        exit(1)

    clean_temp()

    p('Getting playlists')
    playlist = get_spotify_playlist(configs['playlist']['spotify_parsed'])

    global total_playlist
    global total_playlist_cd
    global total_tracks
    global total_tracks_cd
    global parsed_playlist
    songs_not_found_list = []
    parsed_playlist = []
    total_playlist = len(playlist)
    total_playlist_cd = total_playlist
    total_tracks = 0
    total_tracks_cd = 0
    p(hr)
    p('Found ' + str(total_playlist) + ' playlists')

    global process_playlist_threads
    process_playlist_threads = 0

    for pl in playlist:
        def get_playlist(pl2):
            global process_playlist_threads
            global total_tracks
            global parsed_playlist
            if (pl2['user_id'] == False):
                tracks = get_spotify_tracks_individualy(pl2['tracks'])
            else:
                tracks = get_spotify_tracks(pl2['user_id'], pl2['playlist_id'])
            total_tracks += len(tracks)
            p('Got ' + str(len(tracks)) + ' tracks from ' + pl2['name'])
            pl2['tracks'] = tracks
            parsed_playlist.append(pl2)
            process_playlist_threads -= 1

        while process_playlist_threads > configs['concurrent_connections']:
            time.sleep(0.5)

        get_playlist(pl)
        process_playlist_threads += 1
        # t = threading.Thread(target=get_playlist, args=(pl,))
        # t.daemon = True
        # t.start()

    while process_playlist_threads != 0:
        time.sleep(0.5)

    p('Playlist scan complete, found ' + str(total_tracks) + ' total tracks')
    p(hr)
    total_tracks_cd = total_tracks

    diff_file_paths = []

    p2('Starting..')

    for pl in parsed_playlist:
        folder_path = configs['download_dir'] + pl['path']
        for track_index, track in enumerate(pl['tracks']):

            def process_track(pl, folder_path, track, track_index):
                global running_threads
                global total_tracks_cd
                running_threads += 1
                pre_text = pl['name'][:10] + ' | ' + track['name']
                p(hr + ' ' + pre_text)
                p2(str(running_threads) + 'T | ' + pre_text)
                diff_file_paths.append(pl['path'] + track['path'])
                file_path = folder_path + track['path']
                p2(str(running_threads) + 'T | ' + pre_text + ': output to: ' + file_path)
                if os.path.exists(file_path):
                    p2(str(running_threads) + 'T | ' + pre_text + ': file already exists, skipping')
                    total_tracks_cd = total_tracks_cd - 1
                    running_threads -= 1
                    sys.exit()

                search_term = track['search_term']
                p2(str(running_threads) + 'T | ' + pre_text + ': searching yt for ' + search_term)
                all_results = search_youtube(search_term)
                p2(str(running_threads) + 'T | ' + pre_text + ': got ' + str(len(all_results)) + ' results')

                # have to remove unrelated results!!!
                # we are selecting wrong tracks because of the diff.
                # sometimes the diff of unrelated songs match exactly.
                terms = clean_string(track['artist'] + ' ' + track['name'])
                terms_list = terms.split(' ')
                required_matched_terms = []
                for t in terms_list:
                    if len(t) > 1:
                        required_matched_terms.append(t)

                results = []
                required_matches = len(required_matched_terms)
                for r in all_results:
                    matches = 0
                    search_in = r['title'] + ' ' + r['channel'] + ' ' + r['description']
                    edge_case_search_in = r['title'] + ' ' + r['channel']
                    edge_case_search_in2 = clean_string(edge_case_search_in).lower()
                    unrelated = False
                    r2 = clean_string(search_in).lower()
                    for t in terms_list:
                        t2 = clean_string(t).lower()
                        if len(t) > 1 and r2.find(t2) != -1:
                            matches += 1

                    if required_matches < 4 and matches != required_matches:
                        unrelated = True
                    elif required_matches >= 4:
                        # if a song has a long name, considering words beyond 5 are long,
                        # then percent will be calculated, more than n% will qualify
                        required_words_to_matches = configs['song_selection'][
                                                        'min_percent_threshold'] * required_matches / 100
                        if matches < round(required_words_to_matches):
                            unrelated = True

                        # match_percent = matches * 100 / required_matches
                        # if match_percent < configs['song_selection']['min_percent_threshold']:  # matches less than 60 percent will disqualify

                    # detect edge cases here live, instrumental etc
                    edge_cases = configs['song_selection']['edge_cases']
                    for e in edge_cases:
                        if edge_case_search_in2.find(e.lower()) != -1 and terms.find(e.lower()) == -1:
                            unrelated = True
                            break

                    if not configs['song_selection']['use_filtering']:
                        unrelated = False

                    if not unrelated:
                        results.append(r)

                # compare the first X no. of  tracks ? and check for the lowest difference in duration
                def select_result(re):
                    lowest_index = 0
                    lowest_diff = 1000
                    for index, r in enumerate(re):
                        diff = abs(int(r['duration_seconds']) - int(track['duration']))
                        if diff < lowest_diff and index < configs['song_selection']['diff_track_seconds_limit']:
                            lowest_diff = diff
                            lowest_index = index

                    p2(str(running_threads) + 'T | ' + pre_text + ': length diff = ' + str(lowest_diff) + ' seconds')
                    p2(str(running_threads) + 'T | ' + pre_text + ': selecting = "' + re[lowest_index]['title'] + '"')
                    return [lowest_index, lowest_diff]

                if len(results) == 0:
                    p2(str(running_threads) + 'T | ' + pre_text + ': results were not found')
                    songs_not_found_list.append(pre_text + ', term used: ' + track['search_term'])
                    total_tracks_cd = total_tracks_cd - 1
                    running_threads -= 1
                    sys.exit()

                sr = select_result(results)
                result_index = sr[0]
                result_diff = sr[1]
                selected_result = results[result_index]
                try:
                    p2(str(running_threads) + 'T | ' + pre_text + ': downloading audio')
                    video_path = download_video(selected_result['video_id'], track['path'])
                except:
                    # one more try.
                    p2(str(running_threads) + 'T | ' + pre_text + ':failed to download, one more try?')
                    results.pop(result_index)
                    sr = select_result(results)
                    result_index = sr[0]
                    result_diff = sr[1]
                    selected_result = results[result_index]
                    p(str(running_threads) + 'T | ' + pre_text + ':could not download video, selecting different one')
                    try:
                        video_path = download_video(selected_result['video_id'], track['path'])
                    except:
                        p2(str(running_threads) + 'T | ' + pre_text + ':failed to download the song again, giving up!')
                        running_threads -= 1
                        sys.exit()

                # this was the selected result
                track['selected_result'] = selected_result['video_id'] + ' ' + selected_result['title'] + ' I:' + str(
                    result_index) + ' D:' + str(result_diff)

                if not os.path.exists(folder_path):
                    os.makedirs(folder_path)

                # def in_thread():
                p2(str(running_threads) + 'T | ' + pre_text + ': converting to mp3')
                convert_to_mp3(video_path, file_path)
                time.sleep(.1)
                os.remove(video_path)

                if configs['tag_mp3']:
                    p2(str(running_threads) + 'T | ' + pre_text + ': downloading album art')
                    p2(str(running_threads) + 'T | ' + pre_text + ': adding meta-data to mp3')
                    tag_mp3(file_path, track)
                    p2(str(running_threads) + 'T | ' + pre_text + ': saved to ' + file_path)

                total_tracks_cd = total_tracks_cd - 1
                running_threads -= 1

            while running_threads > configs['threads'] - 1:
                time.sleep(.01)

            # time.sleep(random.uniform(0, 1))
            # process_track(pl, folder_path, track, track_index)
            t = threading.Thread(target=process_track, args=(pl, folder_path, track, track_index))
            t.daemon = True
            t.start()

        total_playlist_cd -= 1

    p('Waiting for threads to finish :' + str(running_threads))
    while running_threads != 0:
        print('... Running threads: ' + str(running_threads))
        time.sleep(2)

    p('Checking for removed files')
    diffed_files = diff_files(configs['download_dir'], configs['download_dir'], files=diff_file_paths)

    if len(diffed_files['files_to_remove']):
        p('Removing files')
        process_diff_files(diffed_files, configs['download_dir'], configs['download_dir'])

    sync_drive()

    p('Songs not found: ' + str(len(songs_not_found_list)))
    for s in songs_not_found_list:
        p('not found: ' + s)

    p('Completed')


def sync_drive():
    """
    Sync download drive with sync drives
    :rtype: object
    """
    for drive in configs['sync_download_dir']:
        if os.path.exists(drive):
            p('Syncing files with ' + drive)
            drive_diff_files = diff_files(configs['download_dir'], drive)
            process_diff_files(drive_diff_files, configs['download_dir'], drive)
        else:
            p('The path ' + drive + ' does not exists atm, skipping')


if args.d:
    print('ok')

if args.s:
    process_playlist()

if args.ds:
    sync_drive()