6 """Loads file and do some fixes to fit it to our standard and make it able to be processed
9 filename (str): s3 full path of filename
10 features (dict): contains delimiter, skip_rows and encoding required for current file
11 session (boto3 obj): AWS client connection
12 rel_col (str): column name used to identify upc/release_id
13 Returns: df (pandas dataframe)
15 delimiter = features[
"delimiter"]
16 skip_rows = features[
"skip_rows"]
17 encoding = features[
"encoding"]
18 df = wr.s3.read_csv(filename, sep=delimiter, skiprows=skip_rows, encoding=encoding, dtype={rel_col:str}, low_memory=
False, boto3_session=session)
20 total_rows = df[
"End Date"][df[
"Start Date"]==
"Total_Rows"]
21 total_rows = int(total_rows)
22 df.drop(range(total_rows, df.shape[0], 1), axis=0, inplace=
True)
26 """Loads file and do some fixes to fit it to our standard and make it able to be processed.
27 It's used for a specific itunes version
30 filename (str): s3 full path of filename
31 features (dict): contains delimiter, skip_rows and encoding required for current file
32 session (boto3 obj): AWS client connection
33 rel_col (str): column name used to identify upc/release_id
34 Returns: df (pandas dataframe)
36 delimiter = features[
"delimiter"]
37 skip_rows = features[
"skip_rows"]
38 encoding = features[
"encoding"]
39 df = wr.s3.read_csv(filename, sep=delimiter, skiprows=skip_rows, encoding=encoding, dtype={rel_col:str}, low_memory=
False, boto3_session=session)
41 total_rows = df[
"Apple Identifier"][df[
"Storefront Name"]==
"Row Count"]
42 total_rows = int(total_rows)
43 df.drop(range(total_rows, df.shape[0], 1), axis=0, inplace=
True)