fixed repartition

This commit is contained in:
Ari Brown 2024-09-24 10:48:47 -04:00
parent e3055333b3
commit 5e604f07f7
4 changed files with 10 additions and 8 deletions

View file

@ -8,4 +8,4 @@ run:
- poetry config http-basic.gitlab gitlab-ci-token "$CI_JOB_TOKEN"
- poetry publish --repository gitlab
tags:
- dind
- autoscale

View file

@ -38,7 +38,7 @@ def repartition(mach, agents):
# Prep the info for the docker container
variables = {"min_agent": min(agents),
"max_agent": max(agents),
"source": src_top_level,
"source": sorted_top,
"destination": dst_top_level,
"secondary_destination": None}
@ -54,7 +54,7 @@ def repartition(mach, agents):
#####################################
# Prep the work
# Find out how many hours there are in the dataset
pool_size = 5
pool_size = 1
objs = s.m.s3.ls(src_top_level + "year=")
hours = set(["s3://" + '/'.join([o.bucket_name, *o.key.split("/")[0:-1]])
@ -87,9 +87,6 @@ try:
# Second part: repartition
pool.run(repartition, data=groups)
import IPython
IPython.embed()
finally:
pool.terminate()

View file

@ -2,13 +2,16 @@ import boto3
import minerva as m
class Minerva:
def __init__(self, profile=None):
def __init__(self, profile=None, region=None):
kwargs = {}
if profile:
kwargs["profile_name"] = profile
else:
kwargs["region_name"] = "us-east-1"
if region:
kwargs["region_name"] = region
self.session = boto3.session.Session(**kwargs)
self.s3 = m.S3(self)

View file

@ -16,7 +16,7 @@ readme = "README.md"
minerva-console = "minerva.console:main"
[tool.poetry.dependencies]
python = ">3.9, <3.12"
python = ">3.9"
boto3 = "^1.34.0"
pyarrow = "^14.0.1"
joblib = "^1.1.0"
@ -25,3 +25,5 @@ s3fs = ">2023.6.0"
mako = ">1.2.0"
dask = ">2023.11.0"
distributed = ">2023.11.0"
pandas = ">2.0.0"
numpy = ">1.26.0"