settings.py

# -*- coding: utf-8 -*-

import os
import time
import operator

# Open this file to change the script settings. DO NOT change the settings below
DEFAULT_SETTINGS_FILENAME = 'settings.txt'

"""
Default settings. Note that these are overridden by the default settings file
"""
settings = {
    # Reddit authentication information
    'Username' : '',
    'Password' : '',
    'Client_id' : '',
    'Client_secret' : '',

    'Reddit_Enabled' : True,
    'Reddit_Save_Liked' : True,
    'Reddit_Save_Saved' : True,
    'Reddit_Save_Comments' : True,
    'Reddit_Unlike_Liked': False,
    'Reddit_Unsave_Saved': False,
    'Reddit_Save_Your_User_Posts': True,

    # Total requests to reddit (actual results may vary)
    'Reddit_Total_requests' : 500,

    # Imgur authentication information
    'Imgur_Enabled' : True,
    'Imgur_client_id' : '',
    'Imgur_client_secret' : '',

    # Disable downloading albums by default.
    'Should_download_albums' : False,

    # If true, do not download single images, only submissions which are imgur albums
    'Only_download_albums' : False,

    # Tumblr authentication information
    'Tumblr_Enabled' : True,
    'Tumblr_Client_id' : '',
    'Tumblr_Client_secret' : '',
    'Tumblr_Client_token' : '',
    'Tumblr_Client_token_secret' : '',

    # Total requests to Tumblr
    'Tumblr_Total_requests' : 500,

    # Gfycat authentication information
    # https://developers.gfycat.com/signup/#/apiform
    # Requires https://github.com/ankeshanand/py-gfycat
    'Gfycat_Client_id' : '',
    'Gfycat_Client_secret' : '',

    # Pixiv
    'Pixiv_Enabled' : True,
    'Pixiv_username': '',
    'Pixiv_password': '',

    # Pinterest
    'Pinterest_Enabled' : True,
    'Pinterest_email': '',
    'Pinterest_username': '',
    'Pinterest_password': '',
    'Pinterest_Try_Request_Only_New': True,

    # Youtube DL settings
    'Should_download_videos' : True,
    'Should_download_youtube_videos' : True,
    'Only_download_videos' : False,

    # Don't get new stuff, just use the .xml files from last run
    'Use_cached_submissions' : False,
    'Reddit_cache_file' : 'Reddit_SubmissionCache.bin',
    'Tumblr_cache_file' : 'Tumblr_SubmissionCache.bin',
    'Pixiv_cache_file' : 'Pixiv_SubmissionCache.bin',

    # Attempt to only request and download new submissions (those which haven't been downloaded)
    # This uses the Reddit and Tumblr cache files to know what's already been downloaded,
    #  so it will only work if you've successfully run the script before
    'Reddit_Try_Request_Only_New' : True,
    'Tumblr_Try_Request_Only_New' : True,
    'Pixiv_Try_Request_Only_New' : True,
    'Reddit_Try_Request_Only_New_Saved_Cache_File' : 'Reddit_RequestOnlyNewSaved.bin',
    'Reddit_Try_Request_Only_New_Liked_Cache_File' : 'Reddit_RequestOnlyNewLiked.bin',
    'Tumblr_Try_Request_Only_New_Cache_File' : 'Tumblr_RequestOnlyNew.bin',
    'Pixiv_Try_Request_Only_New_Cache_File' : 'Pixiv_RequestOnlyNew.bin',
    'Pixiv_Try_Request_Only_New_Private_Cache_File' : 'Pixiv_RequestOnlyNewPrivate.bin',
    'Pinterest_Try_Request_Only_New_Cache_File' : 'Pinterest_RequestOnlyNew.bin',

    # If the script failed at say 70%, you could use toggle Use_cached_submissions and set this value to
    #  69. The script would then restart 69% of the way into the cached submissions nearer to where you
    #  left off. 
    # The reason why this isn't default is because there might have been changes to the script which 
    #  made previous submissions successfully download, so we always re-check submissions 
    'Skip_n_percent_submissions': 0,

    # If True, don't actually download the images - just pretend to
    'Should_soft_retrieve' : False,

    'Only_important_messages' : False,

    'Output_dir' : 'output',
    'Metadata_output_dir' : 'metadata',

    'Database' : 'LikedSaved.db',
    # These are gross: for existing output directories, store whether the user has updated their
    # database from the JSON files with the new features. These will automatically set themselves
    'Database_Has_Imported_Unsupported_Submissions' : False,
    'Database_Has_Imported_All_Submissions' : False,
    'Database_Has_Imported_Comments' : False,

    'Port' : 8888,
    'Launch_Browser_On_Startup' : True
}

redditClientSecretInstructions = '''You need OAuth tokens to run the script. To get them follow these steps:</p>
    <ol>
        <li>Go to <a href="https://www.reddit.com/prefs/apps/">Reddit app preferences</a> (while signed in to reddit)</li>
        <li>Scroll down to the bottom and click "create app" (something like that)</li>
        <li>Fill in the fields as such:</li>
            <ul>
                <li><b>name:</b> Content Collector</li>
                <li>Choose <b>"script"</b> as the type</li>
                <li><b>about url:</b> https://github.com/makuto/Liked-Saved-Image-Downloader</li>
                <li><b>redirect uri:</b> http://localhost:8080</li>
            </ul>
        <li>Click create app</li>
        <li>Copy the text which is right below "personal use script" for Client ID</li>
        <li>Copy the secret for Client Secret as well</li>
    </ol>
    <p class="optionComment">Yes, this is painful, but it's for your own security
'''

tumblrClientSecretInstructions = '''
Follow the same procedure as reddit for Tumblr:</p>
    <ol>
        <li><a href="https://www.tumblr.com/oauth/apps">Register the app</a></li>
        <li>Then go <a href="https://api.tumblr.com/console">here</a> to get your tokens</li>
    </ol>
<p class="optionComment">Refer to the <a href="https://github.com/tumblr/pytumblr">PyTumblr page</a> for more details).
'''

requestsInstructions = 'Increase this value to get more submissions'

# This is provides metadata to create nice sections for the web interface
# [('header', ['option_to_include', ('another_option', 'this one has a comment')])]
settingsStructure = [
    ('Output',
     [('Output_dir', 'All images, videos, and comments will be saved to this directory.'
       ' You will have to restart the server whenever you change this value when using the Random Image Browser'),
      ('Metadata_output_dir',
       'Save JSON files with content metadata (source, author, URL, etc.) in this directory')]),

    ('Reddit Authentication',
     ['Reddit_Enabled',
      'Username',
      'Password',
      'Client_id',
      ('Client_secret', redditClientSecretInstructions)]),

    ('Reddit Settings',
     [('Reddit_Total_requests', requestsInstructions),
      'Reddit_Save_Liked',
      'Reddit_Save_Saved',
      'Reddit_Save_Comments',
      ('Reddit_Unlike_Liked', 'Unlike/remove upvote after the submission has been recorded'),
      ('Reddit_Unsave_Saved', 'Unsave submission after it has been recorded'),
      ('Reddit_Try_Request_Only_New',
       "Attempt to only request and download new submissions (those which haven't been downloaded). "
       "This uses the Reddit cache files to know what's already been downloaded, so it will only "
       "work if you've successfully run the script before"),
      ('Reddit_Save_Your_User_Posts',
       'Save posts with the same author as the current user. Disabling this will cause posts with'
       ' your username as author to be ignored.')]),

    ('Imgur Authentication',
     ['Imgur_Enabled',
      'Imgur_client_id',
      ('Imgur_client_secret',"These need to be filled in so that the script can download Imgur "
       "albums. If not filled in, imgur albums will be ignored. Single images will still be "
       "downloaded. If you want to use Imgur, sign in to Imgur, then go "
       "<a href=\"https://api.imgur.com/oauth2/addclient\">here</a> and create your new client.")]),

    ('Gfycat Authentication',
     ['Gfycat_Client_id',
      ('Gfycat_Client_secret', "These need to be filled in so that the script can download Gfycat"
       " media. If not filled in, many Gfycat links will fail to download."
       " Go <a href=\"https://developers.gfycat.com/signup/#/apiform\">here</a> to get your API keys.")]),

    ('Tumblr Authentication',
     ['Tumblr_Enabled',
      'Tumblr_Client_id',
      'Tumblr_Client_secret',
      'Tumblr_Client_token',
      ('Tumblr_Client_token_secret', tumblrClientSecretInstructions)]),

    ('Tumblr Settings',
     [('Tumblr_Total_requests', requestsInstructions),
      ('Tumblr_Try_Request_Only_New',
       "Attempt to only request and download new submissions (those which haven't been downloaded) "
       "This uses the Tumblr cache files to know what's already been downloaded, so it will only "
       "work if you've successfully run the script before")]),

    ('Pixiv Authentication',
     ['Pixiv_Enabled',
      'Pixiv_username',
      'Pixiv_password']),

    ('Pixiv Settings',
     [('Pixiv_Try_Request_Only_New',
       "Attempt to only request and download new submissions (those which haven't been downloaded) "
       "This uses the Pixiv cache files to know what's already been downloaded, so it will only "
       "work if you've successfully run the script before")]),

    ('Pinterest Authentication',
     ['Pinterest_Enabled',
      'Pinterest_email',
      ('Pinterest_username', 'Look in the pinterest url while visiting your profile'),
      'Pinterest_password']),

    ('Pinterest Settings',
     [('Pinterest_Try_Request_Only_New',
       "Attempt to only request and download new submissions (those which haven't been downloaded) "
       "This uses the Pinterest cache files to know what's already been downloaded, so it will only "
       "work if you've successfully run the script before")]),

    ('Download Settings',
     [
         'Should_download_albums',

         ('Only_download_albums',
          'If true, do not download single images, only submissions which are imgur albums'),

         ('Should_download_videos', 'Use <a href="https://github.com/ytdl-org/youtube-dl/">Youtube-dl</a> '
          'to attempt to download videos'),
         ('Should_download_youtube_videos',
          'If <b>Should download videos</b>, whether or not to download YouTube videos'),
         ('Only_download_videos', 'Do not download any images, only supported videos')
     ]),

    ("Server Settings",
     [
        ('Port', 'The port number the server will listen on. Note that ports 80 (HTTP default) and 443 (HTTPS'
         ' default) require the server to be run as root. <b>You must restart the server for this change to take effect.</b>'),

        ('Launch_Browser_On_Startup', 'Open default browser to localhost:port once the server has started'),

        ('Database', "Location of the database"),

        ('Database_Has_Imported_Unsupported_Submissions',
         'Setting this to false will cause a reimport of all <i>UnsupportedSubmissions</i> json files in both '
         '<b>Output dir</b> and <b>Metadata output dir</b>. This value is automatically set to True after '
         'a successful import.'),
        ('Database_Has_Imported_All_Submissions',
         'Setting this to false will cause a reimport of all <i>AllSubmissions</i> json files in both '
         '<b>Output dir</b> and <b>Metadata output dir</b>. This value is automatically set to True after '
         'a successful import.'),
        ('Database_Has_Imported_Comments',
         'Setting this to false will cause a reimport of all <i>Comments</i> json files in both '
         '<b>Output dir</b> and <b>Metadata output dir</b> This value is automatically set to True after '
         'a successful import.')
     ]),

    ('Debugging and Development',
    [
        ('Only_important_messages', 'Output minimal information to the console'),
        ('Skip_n_percent_submissions', "If the script failed at say 70%, you could use toggle "
         "Use_cached_submissions and set this value to 69. The script would then restart 69% of"
         " the way into the cached submissions nearer to where you left off. The reason why this"
         " isn't default is because there might have been changes to the script which made"
         " previous submissions successfully download, so we always re-check submissions"),

        ('Use_cached_submissions', 'Do not get new stuff, just use the cache files from last run'),
        'Reddit_cache_file',
        'Tumblr_cache_file',
        'Pixiv_cache_file',
        'Reddit_Try_Request_Only_New_Saved_Cache_File',
        'Reddit_Try_Request_Only_New_Liked_Cache_File',
        'Tumblr_Try_Request_Only_New_Cache_File',
        'Pixiv_Try_Request_Only_New_Cache_File',
        'Pixiv_Try_Request_Only_New_Private_Cache_File',
        'Pinterest_Try_Request_Only_New_Cache_File',

        ('Should_soft_retrieve', "If True, don't actually download the images - just pretend to"),
    ]),
]

def valueAfterTag(line, optionTag):
    return line[len(optionTag) + 1:].strip(' \t\n')

def lineHasOption(line, optionTag):
    return (optionTag.lower() in line.lower()
            and line[:len(optionTag) + 1].lower() == optionTag.lower() + '=')

def getBooleanOption(line, optionTag):
    if lineHasOption(line, optionTag):
        value = valueAfterTag(line, optionTag).lower()
        return True if (value == 'true' or value == '1') else False
    return False

def getStringOption(line, optionTag):
    if lineHasOption(line, optionTag):
        return valueAfterTag(line, optionTag)
    return ''

def getIntegerOption(line, optionTag):
    if lineHasOption(line, optionTag):
        return int(valueAfterTag(line, optionTag))
    return -1

def readSettings(settingsFileName):
    global settings

    settingsFile = open(settingsFileName, 'r')
    lines = settingsFile.readlines()
    settingsFile.close()

    for line in lines:
        # Ignore blank or commented lines
        if not len(line.strip(' \t\n')) or line[0] == '#':
            continue

        for option in settings:
            if lineHasOption(line, option):
                if type(settings[option]) == bool:
                    settings[option] = getBooleanOption(line, option)
                    break

                elif type(settings[option]) == int:
                    settings[option] = getIntegerOption(line, option)
                    break

                elif type(settings[option]) == str:
                    settings[option] = getStringOption(line, option)
                    break

def hasRedditSettings():
    return (settings["Reddit_Enabled"] and settings['Username'] and settings['Password'] and 
            settings['Client_id'] and settings['Client_secret'])

def hasTumblrSettings():
    return (settings["Tumblr_Enabled"] and settings['Tumblr_Client_id'] and settings['Tumblr_Client_secret'] and 
            settings['Tumblr_Client_token'] and settings['Tumblr_Client_token_secret'])

def hasImgurSettings():
    return (settings["Imgur_Enabled"] and settings['Imgur_client_id'] and settings['Imgur_client_secret'])

def hasPixivSettings():
    return (settings["Pixiv_Enabled"] and settings['Pixiv_username'] and settings['Pixiv_password'])

def hasPinterestSettings():
    return (settings["Pinterest_Enabled"] and settings['Pinterest_username'] and settings['Pinterest_password']
            and settings['Pinterest_email'])

# To make sure I don't accidentally commit my settings.txt, it's marked LOCAL_, 
# which is in .gitignore
hiddenSettingsFilename = "LOCAL_settings.txt"

# Not intended to be edited by a human, definitely shouldn't be checked in
serverSettingsFilename = 'LOCAL_settings_from_server.txt'

# Returns which settings file should be used
def getSettingsFilename():
    candidates = []
    if os.path.isfile(hiddenSettingsFilename):
        candidates.append(hiddenSettingsFilename)
        
    if os.path.isfile(serverSettingsFilename):
        candidates.append(serverSettingsFilename)
        
    if os.path.isfile(DEFAULT_SETTINGS_FILENAME):
        candidates.append(DEFAULT_SETTINGS_FILENAME)

    # No settings files at all; create one
    if not candidates:
        writeServerSettings()
        if os.path.isfile(serverSettingsFilename):
            candidates.append(serverSettingsFilename)
        else:
            print("Error: can't seem to create settings file")
            return None

    # Choose the most recently edited file
    # From http://code.activestate.com/recipes/576804-find-the-oldest-or-yougest-of-a-list-of-files/
    timeNow = time.time()
    newestFile = candidates[0], timeNow - os.path.getctime(candidates[0])
    
    for fileName in candidates:
        age = timeNow - os.path.getctime(fileName)
        if operator.lt(age, newestFile[1]):
            newestFile = fileName, age

    return newestFile[0]

def writeServerSettings():
    settingsOutput = []
    for option in settings:
        optionValue = settings[option]

        if type(settings[option]) == bool:
            optionValue = 'True' if optionValue else 'False'
            
        settingsOutput.append('{}={}\n'.format(option, optionValue))
        
    serverSettings = open(serverSettingsFilename, 'w')
    serverSettings.writelines(settingsOutput)
    serverSettings.close()

    print('Wrote settings to ' + serverSettingsFilename)

def getSettings():
    settingsFilename = getSettingsFilename()
    print('Reading settings from settings file with most recent timestamp, which was:\n'
          + settingsFilename
          + "\nIf you want to read from a different settings file, make it more recent")
    readSettings(settingsFilename)