restructuring as a library with athena and redshift

This commit is contained in:
Ari Brown 2023-08-01 13:19:57 -04:00
parent adf909608d
commit c32f8359e7
8 changed files with 18 additions and 8 deletions

0
minerva/__init__.py Normal file
View file

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -6,6 +6,7 @@ import pyarrow as pa
import pyarrow.dataset
import pprint
import json
import datetime
pp = pprint.PrettyPrinter(indent=4)
@ -79,6 +80,8 @@ class Query:
local = [self.handler.download(f) for f in files]
self.ds = pa.dataset.dataset(local)
self.runtime = tiedot['UpdatedAt'] - tiedot['CreatedAt']
return self.ds
else:
print("Error:")

View file

@ -5,6 +5,7 @@ import time
import pyarrow as pa
import pyarrow.dataset
import pprint
import datetime
pp = pprint.PrettyPrinter(indent=4)
@ -60,7 +61,8 @@ class Query:
while status in ['QUEUED', 'RUNNING']:
time.sleep(5)
status = self.status()['State']
tiedot = self.info()
status = tiedot['Status']['State']
if status == "SUCCEEDED":
# Because we're using `UNLOAD`, we get a manifest of the files
@ -72,6 +74,9 @@ class Query:
local = [self.handler.download(f) for f in files]
self.ds = pa.dataset.dataset(local)
ms = tiedot['Statistics']['TotalExecutionTimeInMillis']
self.runtime = datetime.timedelta(seconds=ms / 1000)
return self.ds
else:
return status # canceled or error

View file

@ -1,9 +1,9 @@
import minerva as m
import minerva.minerva as a
import pprint
pp = pprint.PrettyPrinter(indent=4)
athena = m.Athena("hay", "s3://haystac-pmo-athena/")
athena = a.Athena("hay", "s3://haystac-pmo-athena/")
#query = athena.query(
#"""SELECT *
#FROM trajectories.kitware
@ -14,7 +14,8 @@ athena = m.Athena("hay", "s3://haystac-pmo-athena/")
#""")
query = athena.query("select count(*) as count from trajectories.kitware")
data = query.results()
pp.pprint(query.info()['Statistics'])
pp.pprint(data.head(10))
print(query.runtime)
# Everything *needs* to have a column in order for parquet to work, so scalar
# values have to be assigned something, so here we use `as count` to create

View file

@ -1,4 +1,4 @@
import blueshift as b
import minerva.blueshift as b
import pprint
pp = pprint.PrettyPrinter(indent=4)
@ -7,7 +7,8 @@ red = b.Redshift("hay", "s3://haystac-pmo-athena/",
db="dev",
cluster="redshift-cluster-1")
query = red.query("select count(*) from myspectrum_schema.kitware")
res = query.results()
pp.pprint(res.head(10))
pp.pprint(query.info())
print(query)
data = query.results()
pp.pprint(data.head(10))
print(query.runtime)