diff options
-rw-r--r-- | .gitignore | 4 | ||||
-rwxr-xr-x | update.py | 134 |
2 files changed, 116 insertions, 22 deletions
@@ -1 +1,5 @@ pack-location.txt +geckodriver +geckodriver.exe +geckodriver.log +__pycache__/ @@ -1,5 +1,4 @@ #!/usr/bin/env python3 - import argparse import os import sys @@ -8,6 +7,7 @@ import shutil import re import collections import urllib.parse +import multiprocessing import requests @@ -49,6 +49,10 @@ parser.add_argument("--game-version", VERSION = 0 def read_file(fil): + """ + Given a filename, read its contents in as a list of tuples. + This function strips out comment lines and whitespaces. + """ strings = [] with open(fil) as f: for line in f: @@ -102,32 +106,27 @@ def apply_updates(args): version = tuple(int(x) for x in args.game_version.split('.')) else: version = (2, 0, 0) - print("Populating version File...") + print("Populating version file...") mods = read_file(args.filename) print("Getting new versions of all mods...") - ffx = firefox() + mod_urls = find_updated_urls([x[1] for x in mods], version) + print("Downloading and checksumming all mods...") + checksums = find_checksums(mod_urls) + + # Write information out to version.txt with open(args.version_file, 'w') as f: f.write('# Format: <jarname> <hex digested sha1> <direct download url>\n') f.write("#VERSION " + str(VERSION + 1) + "\n") - for mod in mods: - print("Fetching {mod[0]}...".format(mod=mod)) - if 'curseforge' in mod[1]: - url = find_cdn(ffx, mod[1], version) - else: - url = requests.get(mod[1]).url - if url is None: - print('[!]Failed to fetch {mod[0]}!'.format(mod=mod)) - continue - resp = requests.get(url) - hsh = hashlib.sha1(resp.content).hexdigest() - f.write('{mod[0]} {hsh} {resp.url}\n'.format(mod=mod, hsh=hsh, resp=resp)) - ffx.close() + for name, checksum, url in zip((mod[0] for mod in mods), checksums, mod_urls): + f.write(f'{name} {checksum} {url}\n') + print() print("Done!") print("Updates applied to {args.version_file}".format(args=args)) print("New pack version is " + str(VERSION + 1)) print("[!] No mods were installed. To update your mods folder, run 'update.py install'") + # Find if any updates are available def check_updates(args): if args.game_version is not None: @@ -162,13 +161,83 @@ def check_updates(args): print("Run 'python update.py apply_updates' to create a new version with these updates applied.") +def threaded_find_url(homepage_url, game_version): + """ + Helper function that finds a single mod URL based on the homepage. + """ + if 'curseforge' in homepage_url: + ffx = firefox() + final_url = find_cdn(ffx, homepage_url, game_version) + ffx.close() + else: + final_url = requests.get(homepage_url).url + return final_url + + +def find_updated_urls(forge_urls, game_version, threads=20): + """ + Given a list of mod homepage URLs, find all of their direct download links in parallel. + """ + + # First, check that we can successfully open a Firefox instance in the main thread. + # This provides us with a much nicer error message and quicker feedback. + f = firefox() + f.close() + + with multiprocessing.Pool(threads) as pool: + # No progress indicator possible + # return pool.map(threaded_find_url, forge_urls) + + # Much longer, but allows us to do a nice progress indicator + result_futures = [] + for url in forge_urls: + result_futures.append(pool.apply_async(threaded_find_url, (url, game_version))) + + results = [] + for i,f in enumerate(result_futures): + results.append(f.get()) + print(f'\r{i+1}/{len(result_futures)} URLs updated ({round((i+1)/len(result_futures)*100)}%)', end='') + print() + + return results + + +def threaded_calc_sha1(direct_url): + """ + Helper function that downloads and calculates a single SHA1 hash from a direct download URL. + """ + resp = requests.get(direct_url) + hsh = hashlib.sha1(resp.content).hexdigest() + return hsh + + +def find_checksums(direct_urls, threads=8): + """ + Given a list of direct download URLs, download them all and calculate the SHA1 checksum of the file at that location. + """ + + with multiprocessing.Pool(threads) as pool: + # Much longer, but allows us to do a nice progress indicator + result_futures = [] + for url in direct_urls: + result_futures.append(pool.apply_async(threaded_calc_sha1, (url,))) + + results = [] + for i,f in enumerate(result_futures): + results.append(f.get()) + print(f'\r{i+1}/{len(result_futures)} checksums calculated ({round((i+1)/len(result_futures)*100)}%)', end='') + print() + + return results + + def find_cdn(ffx, url, version): """ Given a mod home URL, finds the most up-to-date mod version compatible with the given game version. Returns the direct Forge CDN download URL """ - #TODO filter mods by forge/fabric compatibility try: + # This goes to the "all files" page, where we get a table view of all ffx.get(url + '/files/all') mod_versions = ffx.find_elements_by_class_name("listing")[0].find_elements_by_xpath("tbody/tr") # extract the table of files from the page row_info = collections.namedtuple("row_info", ["type", "filename", "cdn_id", "game_version"]) # create a custom tuple because data @@ -177,31 +246,52 @@ def find_cdn(ffx, url, version): # parse out the four fields that we use entry_cells = version_entry.find_elements_by_tag_name("td") release_type = entry_cells[0].text + # Note that this is NOT the final filename - this is just the "release name". filename = urllib.parse.quote(entry_cells[1].find_elements_by_tag_name("a")[0].text) try: game_version = tuple([int(x) for x in entry_cells[4].find_element_by_class_name("mr-2").text.split(".")]) # get game version and convert to tuple except: game_version = (0, 0, 0) cdn_id = entry_cells[1].find_element_by_tag_name("a").get_property("href").split("/")[-1] - rows.append(row_info(release_type, filename, cdn_id, game_version)) + + #TODO make this configurable + if 'fabric' not in filename.lower() or 'forge' in filename.lower(): + rows.append(row_info(release_type, filename, cdn_id, game_version)) rows.sort(key=lambda x: x.game_version, reverse=True) best_row = next(x for x in rows if x.game_version <= version) - return f'https://media.forgecdn.net/files/{best_row.cdn_id[:4]}/{best_row.cdn_id[4:]}/{best_row.filename}' + # We need to find the real, ForgeCDN compatible filename now by going to the file page. + ffx.get(f'{url}/files/{best_row.cdn_id}') + # This will probably break in the future + filename = ffx.find_elements_by_xpath("html/body/div/main/div/div/section/div/div/div/section/section/article/div/div/span")[1].text + # URL escape the filename! + filename = urllib.parse.quote(filename) + + # ForgeCDN requires that the leading zeroes are stripped from each portion of the CDN ID, hence the int() cast. + return f'https://media.forgecdn.net/files/{int(best_row.cdn_id[:4])}/{int(best_row.cdn_id[4:])}/{filename}' except: + print(url) + open('temp.txt', 'a').write(url) import traceback; traceback.print_exc() return None def firefox(): - print("Starting Selenium...") + """ + Start a headless Firefox instance and return the Selenium refrence to it. + """ + #print("Starting Selenium...") try: from selenium.webdriver import Firefox + from selenium.webdriver.firefox.options import Options except: print("Applying updates requires the `selenium` package") - os.exit(0) - return Firefox() + exit(0) + options = Options() + options.add_argument('-headless') + options.add_argument('--window-size 1920,1080') + return Firefox(executable_path='./geckodriver', options=options) COMMAND_MAP = { 'install': install, |