|
@@ -0,0 +1,109 @@
|
|
|
+import pandas as pd
|
|
|
+
|
|
|
+def add_zeros_zipcodes(list):
|
|
|
+ for each_zipcode in range(0, len(list)):
|
|
|
+ if len(list[each_zipcode]) < 5:
|
|
|
+ counter = 5-len(list[each_zipcode])
|
|
|
+ for each_missing_digit in range(0,counter):
|
|
|
+ list[each_zipcode]='0'+list[each_zipcode]
|
|
|
+ elif len(list[each_zipcode]) > 5:
|
|
|
+ try:
|
|
|
+ list[each_zipcode] = list[each_zipcode][0:list[each_zipcode].index('.0')]
|
|
|
+ if len(list[each_zipcode]) < 5:
|
|
|
+ counter = 5 - len(list[each_zipcode])
|
|
|
+ for each_missing_digit in range(0, counter):
|
|
|
+ list[each_zipcode] = '0' + list[each_zipcode]
|
|
|
+ except:
|
|
|
+ assert False, "zipcode {} contains more than 5 digits".format(list[each_zipcode])
|
|
|
+ return list
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+class Read_Table(object):
|
|
|
+
|
|
|
+ def __init__(self,path, axes=[], encoding = 'utf-8', seperation = ';', type='xlsx', name=''):
|
|
|
+ '''
|
|
|
+ :param type: Currently Available csv, xlsx and ods
|
|
|
+ '''
|
|
|
+ self.axes = axes
|
|
|
+ self.path = path
|
|
|
+ self.encoding = encoding
|
|
|
+ self.seperation = seperation
|
|
|
+ self.type = type
|
|
|
+ if type == 'xlsx':
|
|
|
+ self.engine = 'openpyxl'
|
|
|
+ self.name = name
|
|
|
+ if type == 'ods':
|
|
|
+ self.engine = 'odf'
|
|
|
+
|
|
|
+ # low Level
|
|
|
+ def read_table_excel_or_ods(self):
|
|
|
+ return pd.read_excel(self.path, engine= self.engine)
|
|
|
+
|
|
|
+ def read_table_SQL(self):
|
|
|
+ raise NotImplementedError
|
|
|
+
|
|
|
+ def read_table_csv_(self):
|
|
|
+ return pd.read_csv(self.path, sep = self.seperation, encoding = self.encoding)
|
|
|
+
|
|
|
+ # mid Level
|
|
|
+ def get_column_titles(self):
|
|
|
+ if self.type== 'xlsx':
|
|
|
+ return self.read_table_excel_or_ods().columns.values
|
|
|
+ if self.type== 'ods':
|
|
|
+ raise NotImplementedError
|
|
|
+ if self.type == 'csv':
|
|
|
+ raise NotImplementedError
|
|
|
+
|
|
|
+ def get_values_from_columns(self, column_titles: list):
|
|
|
+ values=[]
|
|
|
+ for each_element in column_titles:
|
|
|
+ if self.type== 'xlsx':
|
|
|
+ table= pd.read_excel(self.path, engine=self.engine)
|
|
|
+ values+=[[val for val in table[each_element].dropna()]]
|
|
|
+ if self.type== 'ods':
|
|
|
+ raise NotImplementedError
|
|
|
+ if self.type == 'csv':
|
|
|
+ raise NotImplementedError
|
|
|
+ return values
|
|
|
+
|
|
|
+ # high Level
|
|
|
+ def table_to_dict(self):
|
|
|
+ if self.type == 'xlsx':
|
|
|
+ column_titles = self.get_column_titles()
|
|
|
+ values = self.get_values_from_columns(column_titles)
|
|
|
+ return dict(zip(column_titles,values))
|
|
|
+ if self.type == 'ods':
|
|
|
+ raise NotImplementedError
|
|
|
+ if self.type == 'csv':
|
|
|
+ raise NotImplementedError
|
|
|
+
|
|
|
+class Read_unordered_Table(Read_Table):
|
|
|
+ '''
|
|
|
+ This Class is for Tables which do not have a column to value Struct
|
|
|
+ '''
|
|
|
+ def __init__(self,path, axes=[], encoding = 'utf-8', seperation = ';', type='xlsx', name=''):
|
|
|
+ Read_Table.__init__(self,path, axes=axes, encoding = encoding, seperation = seperation, type=type, name=name)
|
|
|
+
|
|
|
+ #High Level
|
|
|
+ def get_values_after_key_as_dict(self, list_of_keys):
|
|
|
+ dict_ = self.table_to_dict()
|
|
|
+ ordered_dict= {}
|
|
|
+ for each_key in list_of_keys:
|
|
|
+ for each_list in dict_.values():
|
|
|
+ for c,each_value in enumerate(each_list):
|
|
|
+ if each_value == each_key:
|
|
|
+ ordered_dict[each_value] = each_list[c+1:]
|
|
|
+ return ordered_dict
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|