diff --git a/minerva/athena.py b/minerva/athena.py index a39e942..b2ef466 100644 --- a/minerva/athena.py +++ b/minerva/athena.py @@ -153,12 +153,15 @@ class Query(Execute): def distribute_results(self, client): + import dask.dataframe as dd + import pandas as pd + if self.ds: return self.ds futures = [] for fn in self.manifest_files(): - df = pd.read_csv(fn) + df = pd.read_parquet(fn) future = client.scatter(df) futures.append(future) diff --git a/minerva/minerva.py b/minerva/minerva.py index 62e28e1..3a4095a 100644 --- a/minerva/minerva.py +++ b/minerva/minerva.py @@ -6,9 +6,15 @@ class Minerva: self.session = boto3.session.Session(profile_name=profile) self.s3 = m.S3(self) + def athena(self, *args, **kwargs): return m.Athena(self, *args, **kwargs) + def redshift(self, *args, **kwargs): return m.Redshift(self, *args, **kwargs) + + def pier(self, *args, **kwargs): + return m.Pier(self, *args, **kwargs) + diff --git a/minerva/pier.py b/minerva/pier.py index 2df1425..85f763d 100644 --- a/minerva/pier.py +++ b/minerva/pier.py @@ -4,17 +4,19 @@ import os import stat from minerva.machine import Machine +from minerva.cluster import Cluster # Used for interacting with AWS class Pier: def __init__(self, - profile = "default", + handler = None, subnet_id = None, sg_groups = [], key_pair = None, # (keypair name, keypair privkey pair) iam = None, ): - self.session = boto3.session.Session(profile_name=profile) + self.handler = handler + self.session = handler.session self.ec2 = self.session.client("ec2") self.subnet_id = subnet_id self.groups = sg_groups @@ -29,6 +31,7 @@ class Pier: else: self.make_key_pair() + def make_key_pair(self): self.key_pair_name = f"Minerva-{random.random()}" print(f"making keypair ({self.key_pair_name})") @@ -39,9 +42,11 @@ class Pier: f.write(self.key['KeyMaterial']) os.chmod(self.key_path, stat.S_IRUSR | stat.S_IWUSR) + def machine(self, **kwargs): return Machine(self, **kwargs) + def t3_med(self, num): return self.machine(ami = "ami-0a538467cc9da9bb2", instance_type = "t3.medium", @@ -49,3 +54,7 @@ class Pier: name = f"blah-{num}", variables = {"num": num}) + + def cluster(self, *args, **kwargs): + return Cluster(self, *args, **kwargs) + diff --git a/pyproject.toml b/pyproject.toml index b67fdf8..84068cd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "minerva" -version = "0.6.1" +version = "0.6.2" description = "Easier access to AWS Athena and Redshift" authors = [ "Ari Brown ",