forked from bellwether/minerva
restructuring as a library with athena and redshift
This commit is contained in:
parent
adf909608d
commit
c32f8359e7
8 changed files with 18 additions and 8 deletions
0
minerva/__init__.py
Normal file
0
minerva/__init__.py
Normal file
BIN
minerva/__pycache__/__init__.cpython-310.pyc
Normal file
BIN
minerva/__pycache__/__init__.cpython-310.pyc
Normal file
Binary file not shown.
BIN
minerva/__pycache__/blueshift.cpython-310.pyc
Normal file
BIN
minerva/__pycache__/blueshift.cpython-310.pyc
Normal file
Binary file not shown.
BIN
minerva/__pycache__/minerva.cpython-310.pyc
Normal file
BIN
minerva/__pycache__/minerva.cpython-310.pyc
Normal file
Binary file not shown.
|
|
@ -6,6 +6,7 @@ import pyarrow as pa
|
|||
import pyarrow.dataset
|
||||
import pprint
|
||||
import json
|
||||
import datetime
|
||||
|
||||
pp = pprint.PrettyPrinter(indent=4)
|
||||
|
||||
|
|
@ -79,6 +80,8 @@ class Query:
|
|||
local = [self.handler.download(f) for f in files]
|
||||
self.ds = pa.dataset.dataset(local)
|
||||
|
||||
self.runtime = tiedot['UpdatedAt'] - tiedot['CreatedAt']
|
||||
|
||||
return self.ds
|
||||
else:
|
||||
print("Error:")
|
||||
|
|
@ -5,6 +5,7 @@ import time
|
|||
import pyarrow as pa
|
||||
import pyarrow.dataset
|
||||
import pprint
|
||||
import datetime
|
||||
|
||||
pp = pprint.PrettyPrinter(indent=4)
|
||||
|
||||
|
|
@ -60,7 +61,8 @@ class Query:
|
|||
|
||||
while status in ['QUEUED', 'RUNNING']:
|
||||
time.sleep(5)
|
||||
status = self.status()['State']
|
||||
tiedot = self.info()
|
||||
status = tiedot['Status']['State']
|
||||
|
||||
if status == "SUCCEEDED":
|
||||
# Because we're using `UNLOAD`, we get a manifest of the files
|
||||
|
|
@ -72,6 +74,9 @@ class Query:
|
|||
local = [self.handler.download(f) for f in files]
|
||||
self.ds = pa.dataset.dataset(local)
|
||||
|
||||
ms = tiedot['Statistics']['TotalExecutionTimeInMillis']
|
||||
self.runtime = datetime.timedelta(seconds=ms / 1000)
|
||||
|
||||
return self.ds
|
||||
else:
|
||||
return status # canceled or error
|
||||
7
test.py
7
test.py
|
|
@ -1,9 +1,9 @@
|
|||
import minerva as m
|
||||
import minerva.minerva as a
|
||||
import pprint
|
||||
|
||||
pp = pprint.PrettyPrinter(indent=4)
|
||||
|
||||
athena = m.Athena("hay", "s3://haystac-pmo-athena/")
|
||||
athena = a.Athena("hay", "s3://haystac-pmo-athena/")
|
||||
#query = athena.query(
|
||||
#"""SELECT *
|
||||
#FROM trajectories.kitware
|
||||
|
|
@ -14,7 +14,8 @@ athena = m.Athena("hay", "s3://haystac-pmo-athena/")
|
|||
#""")
|
||||
query = athena.query("select count(*) as count from trajectories.kitware")
|
||||
data = query.results()
|
||||
pp.pprint(query.info()['Statistics'])
|
||||
pp.pprint(data.head(10))
|
||||
print(query.runtime)
|
||||
|
||||
# Everything *needs* to have a column in order for parquet to work, so scalar
|
||||
# values have to be assigned something, so here we use `as count` to create
|
||||
|
|
|
|||
9
test2.py
9
test2.py
|
|
@ -1,4 +1,4 @@
|
|||
import blueshift as b
|
||||
import minerva.blueshift as b
|
||||
import pprint
|
||||
|
||||
pp = pprint.PrettyPrinter(indent=4)
|
||||
|
|
@ -7,7 +7,8 @@ red = b.Redshift("hay", "s3://haystac-pmo-athena/",
|
|||
db="dev",
|
||||
cluster="redshift-cluster-1")
|
||||
query = red.query("select count(*) from myspectrum_schema.kitware")
|
||||
res = query.results()
|
||||
pp.pprint(res.head(10))
|
||||
pp.pprint(query.info())
|
||||
print(query)
|
||||
data = query.results()
|
||||
pp.pprint(data.head(10))
|
||||
print(query.runtime)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue