diff options
| author | Dylan Jones <dylanjones2011@gmail.com> | 2020-11-25 22:30:20 -0500 | 
|---|---|---|
| committer | Dylan Jones <dylanjones2011@gmail.com> | 2020-11-25 22:30:20 -0500 | 
| commit | 2c29fbad324c826e118be42e510a8388e4648ef5 (patch) | |
| tree | 2255f83ccb47235cc64cf0b319eb1a5439549e2b | |
| parent | 407d3016088d756020662cd9e2c44089ada6aefb (diff) | |
| download | modpackman-2c29fbad324c826e118be42e510a8388e4648ef5.tar.gz modpackman-2c29fbad324c826e118be42e510a8388e4648ef5.zip | |
Parallelize and fix more bugs
| -rw-r--r-- | .gitignore | 4 | ||||
| -rwxr-xr-x | update.py | 134 | 
2 files changed, 116 insertions, 22 deletions
| @@ -1 +1,5 @@  pack-location.txt +geckodriver +geckodriver.exe +geckodriver.log +__pycache__/ @@ -1,5 +1,4 @@  #!/usr/bin/env python3 -  import argparse  import os  import sys @@ -8,6 +7,7 @@ import shutil  import re  import collections  import urllib.parse +import multiprocessing  import requests @@ -49,6 +49,10 @@ parser.add_argument("--game-version",  VERSION = 0  def read_file(fil): +    """ +    Given a filename, read its contents in as a list of tuples. +    This function strips out comment lines and whitespaces. +    """      strings = []      with open(fil) as f:          for line in f: @@ -102,32 +106,27 @@ def apply_updates(args):          version = tuple(int(x) for x in args.game_version.split('.'))      else:          version = (2, 0, 0) -    print("Populating version File...") +    print("Populating version file...")      mods = read_file(args.filename)      print("Getting new versions of all mods...") -    ffx = firefox() +    mod_urls = find_updated_urls([x[1] for x in mods], version) +    print("Downloading and checksumming all mods...") +    checksums = find_checksums(mod_urls) + +    # Write information out to version.txt      with open(args.version_file, 'w') as f:          f.write('# Format: <jarname> <hex digested sha1> <direct download url>\n')          f.write("#VERSION " + str(VERSION + 1) + "\n") -        for mod in mods: -            print("Fetching {mod[0]}...".format(mod=mod)) -            if 'curseforge' in mod[1]: -                url = find_cdn(ffx, mod[1], version) -            else: -                url = requests.get(mod[1]).url -            if url is None: -                print('[!]Failed to fetch {mod[0]}!'.format(mod=mod)) -                continue -            resp = requests.get(url) -            hsh = hashlib.sha1(resp.content).hexdigest() -            f.write('{mod[0]} {hsh} {resp.url}\n'.format(mod=mod, hsh=hsh, resp=resp)) -    ffx.close() +        for name, checksum, url in zip((mod[0] for mod in mods), checksums, mod_urls): +            f.write(f'{name} {checksum} {url}\n') +      print()      print("Done!")      print("Updates applied to {args.version_file}".format(args=args))      print("New pack version is " + str(VERSION + 1))      print("[!] No mods were installed. To update your mods folder, run 'update.py install'") +  # Find if any updates are available  def check_updates(args):      if args.game_version is not None: @@ -162,13 +161,83 @@ def check_updates(args):          print("Run 'python update.py apply_updates' to create a new version with these updates applied.") +def threaded_find_url(homepage_url, game_version): +    """ +    Helper function that finds a single mod URL based on the homepage. +    """ +    if 'curseforge' in homepage_url: +        ffx = firefox() +        final_url = find_cdn(ffx, homepage_url, game_version) +        ffx.close() +    else: +        final_url = requests.get(homepage_url).url +    return final_url + + +def find_updated_urls(forge_urls, game_version, threads=20): +    """ +    Given a list of mod homepage URLs, find all of their direct download links in parallel. +    """ + +    # First, check that we can successfully open a Firefox instance in the main thread. +    # This provides us with a much nicer error message and quicker feedback. +    f = firefox() +    f.close() + +    with multiprocessing.Pool(threads) as pool: +        # No progress indicator possible +        # return pool.map(threaded_find_url, forge_urls) + +        # Much longer, but allows us to do a nice progress indicator +        result_futures = [] +        for url in forge_urls: +            result_futures.append(pool.apply_async(threaded_find_url, (url, game_version))) + +        results = [] +        for i,f in enumerate(result_futures): +            results.append(f.get()) +            print(f'\r{i+1}/{len(result_futures)} URLs updated ({round((i+1)/len(result_futures)*100)}%)', end='') +        print() + +        return results + + +def threaded_calc_sha1(direct_url): +    """ +    Helper function that downloads and calculates a single SHA1 hash from a direct download URL. +    """ +    resp = requests.get(direct_url) +    hsh = hashlib.sha1(resp.content).hexdigest() +    return hsh + + +def find_checksums(direct_urls, threads=8): +    """ +    Given a list of direct download URLs, download them all and calculate the SHA1 checksum of the file at that location. +    """ +     +    with multiprocessing.Pool(threads) as pool: +        # Much longer, but allows us to do a nice progress indicator +        result_futures = [] +        for url in direct_urls: +            result_futures.append(pool.apply_async(threaded_calc_sha1, (url,))) + +        results = [] +        for i,f in enumerate(result_futures): +            results.append(f.get()) +            print(f'\r{i+1}/{len(result_futures)} checksums calculated ({round((i+1)/len(result_futures)*100)}%)', end='') +        print() + +        return results + +  def find_cdn(ffx, url, version):      """      Given a mod home URL, finds the most up-to-date mod version compatible with the given game version.      Returns the direct Forge CDN download URL      """ -    #TODO filter mods by forge/fabric compatibility      try: +        # This goes to the "all files" page, where we get a table view of all           ffx.get(url + '/files/all')          mod_versions = ffx.find_elements_by_class_name("listing")[0].find_elements_by_xpath("tbody/tr") # extract the table of files from the page          row_info = collections.namedtuple("row_info", ["type", "filename", "cdn_id", "game_version"]) # create a custom tuple because data @@ -177,31 +246,52 @@ def find_cdn(ffx, url, version):              # parse out the four fields that we use              entry_cells = version_entry.find_elements_by_tag_name("td")              release_type = entry_cells[0].text +            # Note that this is NOT the final filename - this is just the "release name".              filename = urllib.parse.quote(entry_cells[1].find_elements_by_tag_name("a")[0].text)              try:                  game_version = tuple([int(x) for x in entry_cells[4].find_element_by_class_name("mr-2").text.split(".")]) # get game version and convert to tuple              except:                  game_version = (0, 0, 0)              cdn_id = entry_cells[1].find_element_by_tag_name("a").get_property("href").split("/")[-1] -            rows.append(row_info(release_type, filename, cdn_id, game_version)) + +            #TODO make this configurable +            if 'fabric' not in filename.lower() or 'forge' in filename.lower(): +                rows.append(row_info(release_type, filename, cdn_id, game_version))          rows.sort(key=lambda x: x.game_version, reverse=True)          best_row = next(x for x in rows if x.game_version <= version) -        return f'https://media.forgecdn.net/files/{best_row.cdn_id[:4]}/{best_row.cdn_id[4:]}/{best_row.filename}' +        # We need to find the real, ForgeCDN compatible filename now by going to the file page. +        ffx.get(f'{url}/files/{best_row.cdn_id}') +        # This will probably break in the future +        filename = ffx.find_elements_by_xpath("html/body/div/main/div/div/section/div/div/div/section/section/article/div/div/span")[1].text +        # URL escape the filename! +        filename = urllib.parse.quote(filename) + +        # ForgeCDN requires that the leading zeroes are stripped from each portion of the CDN ID, hence the int() cast. +        return f'https://media.forgecdn.net/files/{int(best_row.cdn_id[:4])}/{int(best_row.cdn_id[4:])}/{filename}'      except: +        print(url) +        open('temp.txt', 'a').write(url)          import traceback; traceback.print_exc()          return None  def firefox(): -    print("Starting Selenium...") +    """ +    Start a headless Firefox instance and return the Selenium refrence to it. +    """ +    #print("Starting Selenium...")      try:          from selenium.webdriver import Firefox +        from selenium.webdriver.firefox.options import Options      except:          print("Applying updates requires the `selenium` package") -        os.exit(0) -    return Firefox() +        exit(0) +    options = Options() +    options.add_argument('-headless') +    options.add_argument('--window-size 1920,1080') +    return Firefox(executable_path='./geckodriver', options=options)  COMMAND_MAP = {      'install': install, | 
