37s3_client = boto3.client(
"s3", aws_access_key_id=ACCESS_ID, aws_secret_access_key= ACCESS_KEY)
85 """Searches format using headers and takes template information. Also builds a list with main features of each column
88 headers (list): contains the X firsts lines from current file
89 collection (mongo collection): Mongo collection where formats templates are storage
90 Returns: template_format (dict)
95 for header
in headers:
96 template_format = collection.find_one({
"header": header})
98 print(
"FORMAT: {} \t VERSION: {} \t DELIMITER: {}".format(template_format[
"format"], template_format[
"version"], template_format[
"delimiters"]))
99 print(
"HEADER IDENTIFIED: {}".format(header))
100 ottoMapping_columns =
cols_otto(json.loads(template_format[
"schema"]), json.loads(template_format[
"ottoMapping"]))
101 return template_format[
"format"], template_format[
"delimiters"], ottoMapping_columns, skip_rows
103 raise Exception(
"No format identified")
144 """Executes full procedure filter csv, txt and xls files
147 event (dict): is a dictionary with all client and sales information
148 context (none): it's required just for lambda execution
152 event = receive_path(event, s3_client)
153 print(
"Status={}".format(event[
"status"]))
155 not_identified = dict()
157 for files
in event[
"files"]:
159 file_db_id = files[
"file_id"]
161 file_extension = os.path.splitext(file)[1]
162 if "xls" in file_extension:
164 s3_file_obj = s3_client.get_object(Bucket=event[
"bucket"][0], Key=event[
"path"][0]+
'/'+file)
165 xls = ExcelFile(s3_file_obj[
'Body'].read())
166 pass_excel_to_csv(event, files, xls, s3_client)
167 except Exception
as e:
169 m =
"{}\n{}".format(sys.exc_info()[2], traceback.format_exc())
170 not_identified[file_db_id] = {
"file": file}
172 error = eh.handle(e, m, file_db_id)
174 print(
"Maybe there is a sheet not identified")
180 formats, delimiter, otto_cols, skip_rows =
identifyHeaders(csv_headers, collection)
181 if formats
not in csv_formats:
182 csv_formats[formats] = {
"files":[{
"file_id":file_db_id,
"file": file,
"delimiter": delimiter,
"skip_rows": skip_rows,
"encoding":encoding}],
"columns": otto_cols}
184 csv_list = csv_formats[formats][
"files"]
185 csv_list.append({
"file_id":file_db_id,
"file": file,
"delimiter": delimiter,
"skip_rows": skip_rows,
"encoding":encoding})
186 except Exception
as e:
188 m =
"{}\n{}".format(sys.exc_info()[2], traceback.format_exc())
192 error = eh.handle(e, m, file_db_id)
193 not_identified[file_db_id] = {
"file":file,
"error":error}
195 print(
"File {} not identified".format(file))
197 snap_collection.update_one({
"file_db_id":file_db_id},{
"$set":{
"status":
"identified"}})
198 event[
"status"] =
"OK"
199 event[
"format"] = csv_formats
200 event[
"no_format_identified"] = not_identified