Glider
Loading...
Searching...
No Matches
Excel_to_csv.py
Go to the documentation of this file.
1import os
2from dotenv import load_dotenv
3from pandas import read_excel
4from MongoConnection import mongo_connection
5
6load_dotenv()
7
8def upload_info_mongo(event, path, file_id, filename):
9 """Upload file information from xls file to new CSV file
10
11 Args:
12 event (dict):
13 path (str): s3 path where file is storage
14 file_id (str): current/new file_id assigned for mongodb
15 filename (str): current file
16 Returns: Nothing
17 """
18 mongo_conn = mongo_connection()
19 # Mongo collection to upload snapshots
20 collection = mongo_conn.mongo_conn_snapshots()
21 new_file = {
22 "file_db_id": file_id,
23 "status": "processing",
24 "client_id": event["client_id"],
25 "name": filename,
26 "s3_path": path+"/"+filename
27 }
28 if collection.count_documents({"file_db_id": file_id,"name": filename}) >= 1:
29 collection.delete_many({"file_db_id": file_id, "name": filename})
30 collection.insert_one(new_file)
31 else:
32 collection.insert_one(new_file)
33
34def pass_excel_to_csv(event, files, xls, s3):
35 """Transform xsl file into csv file(s) according sheets number
36
37 Args:
38 event (dict): is a dictionary with all client and sales information
39 files (dict): is a dictionary with file information
40 xls (ExcelFile obj): file loaded as Excel file
41 s3 (boto3 client): AWS client connection
42 Returns: event (dict)
43 """
44 bucket = event["bucket"][0]
45 path = event["path"][0]
46 tmp_name = "/tmp/file.csv"
47 file_id = files["file_id"]
48 filename = files["file"]
49 filename = filename.split(".xls")[0]
50 count=1
51 for sheet in xls.sheet_names:
52 df = read_excel(xls, sheet_name=sheet, engine='openpyxl')
53 if len(df) > 0:
54 new_filename = filename+"_"+sheet.replace(" ", "_")+".csv"
55 print("Creating new csv file: {}".format(new_filename))
56 df.to_csv(tmp_name, index=False)
57 with open(tmp_name, 'rb') as f:
58 object_data = f.read()
59 s3.put_object(Body=object_data, Bucket=bucket, Key=path+"/"+new_filename)
60 upload_info_mongo(event, path, file_id+"-"+str(count), new_filename)
61 event["files"].append({"file_id":file_id+"-"+str(count), "file": new_filename, "root_id":files["file_id"]})
62 count += 1
63 else:
64 print("Sheet '{}' not created cause it is empty".format(sheet))
65
66 return event
67
upload_info_mongo(event, path, file_id, filename)
pass_excel_to_csv(event, files, xls, s3)