added helpers for local files, loading templates, and an example for canceling queries

This commit is contained in:
Ari Brown 2024-01-25 11:10:50 -05:00
parent 5bd2218612
commit ae3173b510
6 changed files with 29 additions and 19 deletions

View file

@ -1,17 +1,15 @@
import minerva
import sys
m = minerva.Minerva("hay-te")
athena = m.athena("s3://haystac-te-athena/")
file = "/tmp/queries.txt"
file = sys.argv[1] # "/tmp/queries.txt"
with open(file, 'r') as f:
txt = f.read()
for line in txt.split("\n"):
if not line:
continue
print(line)
athena.cancel(line)

View file

@ -1,4 +1,4 @@
from .parallel import parallel_map
from .helpers import parallel_map, local, load_template
from .athena import Athena
from .redshift import Redshift
@ -18,6 +18,8 @@ __all__ = [
"Machine",
"Pier",
"Minerva",
"parallel_map"
"parallel_map",
"local",
"load_template"
]

View file

@ -8,21 +8,10 @@ import pyarrow.dataset
import pprint
import datetime
import dask.dataframe as dd
from minerva import parallel_map
from mako.template import Template
from minerva import parallel_map, local, load_template
pp = pprint.PrettyPrinter(indent=4)
# Get full path of fname
def local(fname):
return os.path.join(os.path.abspath(os.path.dirname(__file__)), fname)
def load_sql(path, **params):
with open(path, 'r') as f:
query = f.read()
return Template(query).render(**params)
class Athena:
def __init__(self, handler, output=None):
self.handler = handler

View file

@ -0,0 +1,5 @@
create table ${dest}
with (format = 'PARQUET', external_location = ${repr(output)})
as
${tables}

View file

@ -1,5 +1,8 @@
import os
import inspect
import math
from joblib import Parallel, delayed
from mako.template import Template
# If you have a list of 100 elements and want to process it with 8 cores,
# it will split it into 8 chunks (7 chunks of 13, 1 chunk of 9). `func` is
@ -31,3 +34,15 @@ def parallel_map(func=None, data=None, cores=8):
# Flatten the nested lists
return [val for r in res for val in r]
# Get full path of fname
def local(fname):
return os.path.join(inspect.stack()[0][1], fname)
def load_template(path, **params):
with open(path, 'r') as f:
query = f.read()
return Template(query).render(**params)

View file

@ -1,6 +1,6 @@
[tool.poetry]
name = "minerva"
version = "0.7.0"
version = "0.7.1"
description = "Easier access to AWS Athena and Redshift"
authors = [
"Ari Brown <ari@airintech.com>",
@ -22,3 +22,4 @@ pyarrow = "^14.0.1"
joblib = "^1.1.0"
fabric = "^3.0.0"
s3fs = "2023.6.0"
mako = ">1.2.0"