14 """Removes blank spaces in the filename by underscores and update information in S3
17 bucket (str): bucket where parquet file is storage
18 path (str): s3 path where parquet file is storage
19 file (str): current filename
20 s3 (boto3 obj): AWS client connection
21 Returns: new filename (str)
23 new_filename = file.replace(
" ",
"_")
24 print(
"File Renamed: {}".format(new_filename))
25 response = s3.list_objects_v2(Bucket= bucket, Prefix = path+
"/"+file)
26 source_key = response[
"Contents"][0][
"Key"]
27 copy_source = {
'Bucket': bucket,
'Key': path+
"/"+file}
28 s3.copy_object(Bucket = bucket, CopySource = copy_source, Key = path+
"/"+new_filename)
29 s3.delete_object(Bucket = bucket, Key = source_key)
33 """Reviews if file exists in s3 and takes actions if it doesn't or file extension is not allowed
36 event (dict): is a dictionary with all client and sales information
37 s3_client (boto3 obj): AWS client connection
40 bucket_in = event[
'bucket'][0]
41 path_in = event[
"path"][0]
42 bucket_out = event[
"bucket"][1]
43 path_out = event[
"path"][1]
44 response_in = s3_client.list_objects(Bucket = bucket_in, Prefix=path_in)
45 response_out = s3_client.list_objects(Bucket = bucket_out, Prefix=path_out)
47 if "Contents" not in response_in:
48 raise Exception (
"Path invalid.")
50 if "Contents" not in response_out:
51 s3_client.put_object(Bucket=bucket_out, Key=(path_out+
'/'))
52 possible_files=[
".csv",
".tsv",
".xls",
".xlsx",
".txt",
".xml"]
54 for file
in event[
"files"]:
56 file_extension = os.path.splitext(file[
"file"])[1]
57 if file_extension
not in possible_files:
58 print(
"FILE {} NOT SUPPORTED.".format(file[
"file"]))
61 s3_client.head_object(Bucket=bucket_in, Key=path_in+
"/"+file[
"file"])
62 if " " in file[
"file"]:
63 file[
"file"] =
replace_file(bucket_in, path_in, file[
"file"], s3_client)
64 except Exception
as e:
65 print(
"File {} will be discarded due {}".format(file[
"file"], e))
66 m =
"{}\n{}".format(sys.exc_info()[2], traceback.format_exc())
67 file_db_id = file[
"file_id"]
68 event[
"files"].remove(file)
70 error = eh.handle(e, m, file_db_id)
71 status =
"Warning. Some files can not be found"
72 event[
"status"] = status