Glider
Loading...
Searching...
No Matches
Validate.py
Go to the documentation of this file.
1import sys
2import traceback
3
4
5lines_to_drop = []
7 """
8 When data is processed, it reads all lines and verify it data is complete or not
9 """
10 def upc_validation(self, df, release_id):
11 """ Sets 'undefined' value for null release_id
12
13 Args:
14 df (pandas dataframe): processed file loaded as dataframe
15 release_id (str): release_id column
16 Returns:
17 release_id (str)
18 """
19 release_id = str(release_id)
20 if (release_id == "") or ("na" in release_id) or (release_id == "0") or (release_id == "None"):
21 # print("Undefined")
22 pass
23 release_id = "undefined"
24 # return release_id
25 elif len(release_id) < 12 and len(release_id) >= 14:
26 print("Error in {}. release_id invalid".format(release_id))
27 # self.delete_rows(df, "release_id", release_id)
28 # elif len(release_id) <= 12:
29 # top = 13
30 # n_zero = top-len(release_id)
31 # release_id = "0"*n_zero + release_id
32 return release_id
33
34 def isrc_validation(self, df, isrc):
35 """ Sets 'undefined' value for null isrc_id
36
37 Args:
38 df (pandas dataframe): processed file loaded as dataframe
39 isrc (str): release_id column
40 Returns:
41 isrc (str)
42 """
43 isrc = str(isrc)
44 if isrc == "" or isrc == "nan" or isrc == "None":
45 # return isrc
46 # isrc = "undefined"
47 pass
48 elif len(isrc) != 12:
49 pass
50 # print("Error in {}. ISRC invalid".format(isrc))
51 # self.delete_rows(df, "isrc_id", isrc)
52 return isrc
53
54 def territory_validation(self, df, territory):
55 """ Sets 'undefined' value for null territory_code
56
57 Args:
58 df (pandas dataframe): processed file loaded as dataframe
59 territory (str): release_id column
60 Returns:
61 territory (str)
62 """
63 territory = str(territory)
64 if territory == "-" or territory == "N/A" or territory == "" or territory == "nan":
65 territory = "XX"
66 # print(territory)
67 elif len(territory) != 2:
68 print("Error in {}. territory invalid".format(territory))
69 # self.delete_rows(df, "territory_code", territory)
70 return territory
71
72 def delete_rows(self, df, col, value):
73 """ Deletes rows which don't satisfy the OTTO standard
74
75 Args:
76 df (pandas dataframe): processed file loaded as dataframe
77 col (str): column name
78 value (str): determines which lines should be deleted
79 Returns: lines_to_drop (list)
80 """
81 # print("DELETING ROWS")
82 df_idx = list(df[df[col] == value].index)
83 lines_to_drop[len(lines_to_drop):] = df_idx
84 return lines_to_drop
85
86 # def validate_columns(self, df, columns_type):
87 # """ executes de data validation
88
89 # Args:
90 # df (pandas dataframe): processed file loaded as dataframe
91 # columns_type (str): column name
92 # value (str): determines which lines should be deleted
93 # Returns: lines_to_drop (list)
94 # """
95 # print("DATA BEFORE CLEANING: {}".format(df.shape))
96 # null_values = list(df.columns[df.isnull().any()])
97 # # print(null_values)
98 # for col in null_values:
99 # try:
100 # if list(filter(lambda item: item["value"].lower() == col and item["flagNull"] == "false", columns_type)):
101 # df = df.dropna(subset=[col])
102 # except Exception:
103 # continue
104 # df.reset_index(inplace=True)
105 # print("DATA AFTER CLEANING: {}".format(df.shape))
106 # return df
107
108 def validation(self, df):
109 """ Executes de data validation
110
111 Args:
112 df (pandas dataframe): processed file loaded as dataframe
113 Returns: df (pandas dataframe)
114 status (str)
115 """
116 try:
117 df["release_id"] = df["release_id"].apply(lambda x: str(x).replace(".0", ""))
118 df["release_id"] = df.apply(lambda Row: self.upc_validation(df, Row["release_id"]), axis=1)
119 df["isrc_id"] = df.apply(lambda Row: self.isrc_validation(df, Row["isrc_id"]), axis=1)
120 df["territory_code"] = df.apply(lambda Row: self.territory_validation(df, Row["territory_code"]), axis=1)
121 # df.drop(lines_to_drop, inplace=True)
122 # valid = valid.unique()
123 # valid = valid.all()
124 status = "Ok"
125 return df, status
126 except Exception:
127 print(sys.exc_info()[2])
128 print(traceback.format_exc())
129 status = "Validation Failed"
130 return None, status
territory_validation(self, df, territory)
Definition Validate.py:54
delete_rows(self, df, col, value)
Definition Validate.py:72
upc_validation(self, df, release_id)
Definition Validate.py:10