Glider
Loading...
Searching...
No Matches
ReceivePath.py
Go to the documentation of this file.
1import os
2import boto3
3import sys
4import traceback
5from os.path import join, dirname
6from dotenv import load_dotenv
7from pathlib import Path
8from ErrorHandler import *
9
10# dotenv_path = Path("src/importer/.env")
11load_dotenv()
12
13def replace_file(bucket, path, file, s3):
14 """Removes blank spaces in the filename by underscores and update information in S3
15
16 Args:
17 bucket (str): bucket where parquet file is storage
18 path (str): s3 path where parquet file is storage
19 file (str): current filename
20 s3 (boto3 obj): AWS client connection
21 Returns: new filename (str)
22 """
23 new_filename = file.replace(" ", "_")
24 print("File Renamed: {}".format(new_filename))
25 response = s3.list_objects_v2(Bucket= bucket, Prefix = path+"/"+file)
26 source_key = response["Contents"][0]["Key"]
27 copy_source = {'Bucket': bucket, 'Key': path+"/"+file}
28 s3.copy_object(Bucket = bucket, CopySource = copy_source, Key = path+"/"+new_filename)
29 s3.delete_object(Bucket = bucket, Key = source_key)
30 return new_filename
31
32def receive_path(event, s3_client):
33 """Reviews if file exists in s3 and takes actions if it doesn't or file extension is not allowed
34
35 Args:
36 event (dict): is a dictionary with all client and sales information
37 s3_client (boto3 obj): AWS client connection
38 Returns: (dict)
39 """
40 bucket_in = event['bucket'][0]
41 path_in = event["path"][0]
42 bucket_out = event["bucket"][1]
43 path_out = event["path"][1]
44 response_in = s3_client.list_objects(Bucket = bucket_in, Prefix=path_in)
45 response_out = s3_client.list_objects(Bucket = bucket_out, Prefix=path_out)
46 # Verifies if input path exist
47 if "Contents" not in response_in:
48 raise Exception ("Path invalid.")
49 #Create a new folder for client if it does not exist
50 if "Contents" not in response_out:
51 s3_client.put_object(Bucket=bucket_out, Key=(path_out+'/'))
52 possible_files=[".csv", ".tsv", ".xls", ".xlsx", ".txt", ".xml"]
53 status = "OK"
54 for file in event["files"]:
55 try:
56 file_extension = os.path.splitext(file["file"])[1]
57 if file_extension not in possible_files:
58 print("FILE {} NOT SUPPORTED.".format(file["file"]))
59 # no_processed[file_db_id]={"file":file["file"], "error": error}
60 raise FileNotSupported()
61 s3_client.head_object(Bucket=bucket_in, Key=path_in+"/"+file["file"])
62 if " " in file["file"]:
63 file["file"] = replace_file(bucket_in, path_in, file["file"], s3_client)
64 except Exception as e:
65 print("File {} will be discarded due {}".format(file["file"], e))
66 m = "{}\n{}".format(sys.exc_info()[2], traceback.format_exc())
67 file_db_id = file["file_id"]
68 event["files"].remove(file)
69 eh = ErrorHandler()
70 error = eh.handle(e, m, file_db_id)
71 status = "Warning. Some files can not be found"
72 event["status"] = status
73 return event
74 # return {
75 # "status": status,
76 # "tag": event["tag"],
77 # "cat_gen": event["cat_gen"],
78 # "cat_match": event["cat_match"],
79 # "bucket": event['bucket'],
80 # "path": event['path'],
81 # "client_id": event["client_id"],
82 # "files": event["files"],
83 # 'currency': event['currency']
84 # }
replace_file(bucket, path, file, s3)
receive_path(event, s3_client)