added parallel file downloading and dried up the code a touch

This commit is contained in:
Ari Brown 2023-08-02 15:12:06 -04:00
parent 68bc346e24
commit 22746b6639
5 changed files with 16 additions and 11 deletions

View file

@ -6,6 +6,7 @@ import pyarrow as pa
import pyarrow.dataset
import pprint
import datetime
import json
from minerva import parallel_map
pp = pprint.PrettyPrinter(indent=4)
@ -66,8 +67,10 @@ class Execute:
return self.info_cache
def finish(self):
while stat := self.status() in ['SUBMITTED', 'PICKED', 'STARTED']:
stat = self.status()
while stat in ['SUBMITTED', 'PICKED', 'STARTED']:
time.sleep(5)
stat = self.status()
return stat # finalized state
@ -87,9 +90,9 @@ class Query(Execute):
def manifest_files(self):
status = self.finish()
if status == "SUCCEEDED":
if status == "FINISHED":
# Track the runtime
self.runtime = tiedot['UpdatedAt'] - tiedot['CreatedAt']
self.runtime = self.info_cache['UpdatedAt'] - self.info_cache['CreatedAt']
# Because we're using `UNLOAD`, we get a manifest of the files
# that make up our data.