8 When data is processed, it reads all lines and verify it data is complete or not
11 """ Sets 'undefined' value for null release_id
14 df (pandas dataframe): processed file loaded as dataframe
15 release_id (str): release_id column
19 release_id = str(release_id)
20 if (release_id ==
"")
or (
"na" in release_id)
or (release_id ==
"0")
or (release_id ==
"None"):
23 release_id =
"undefined"
25 elif len(release_id) < 12
and len(release_id) >= 14:
26 print(
"Error in {}. release_id invalid".format(release_id))
35 """ Sets 'undefined' value for null isrc_id
38 df (pandas dataframe): processed file loaded as dataframe
39 isrc (str): release_id column
44 if isrc ==
"" or isrc ==
"nan" or isrc ==
"None":
55 """ Sets 'undefined' value for null territory_code
58 df (pandas dataframe): processed file loaded as dataframe
59 territory (str): release_id column
63 territory = str(territory)
64 if territory ==
"-" or territory ==
"N/A" or territory ==
"" or territory ==
"nan":
67 elif len(territory) != 2:
68 print(
"Error in {}. territory invalid".format(territory))
73 """ Deletes rows which don't satisfy the OTTO standard
76 df (pandas dataframe): processed file loaded as dataframe
77 col (str): column name
78 value (str): determines which lines should be deleted
79 Returns: lines_to_drop (list)
82 df_idx = list(df[df[col] == value].index)
83 lines_to_drop[len(lines_to_drop):] = df_idx
109 """ Executes de data validation
112 df (pandas dataframe): processed file loaded as dataframe
113 Returns: df (pandas dataframe)
117 df[
"release_id"] = df[
"release_id"].apply(
lambda x: str(x).replace(
".0",
""))
118 df[
"release_id"] = df.apply(
lambda Row: self.
upc_validation(df, Row[
"release_id"]), axis=1)
119 df[
"isrc_id"] = df.apply(
lambda Row: self.
isrc_validation(df, Row[
"isrc_id"]), axis=1)
120 df[
"territory_code"] = df.apply(
lambda Row: self.
territory_validation(df, Row[
"territory_code"]), axis=1)
127 print(sys.exc_info()[2])
128 print(traceback.format_exc())
129 status =
"Validation Failed"
isrc_validation(self, df, isrc)
territory_validation(self, df, territory)
delete_rows(self, df, col, value)
upc_validation(self, df, release_id)