Source code for silvio.extensions.utils.transform

"""
Methods used in the Transform step of a ETL pipeline.
"""



[docs]def list_integer(SeqList): '''define input values''' alphabet = 'ACGT' char_to_int = dict((c,i) for i,c in enumerate(alphabet)) IntegerList = list() for mySeq in SeqList: # integer encode input data integer_encoded = [char_to_int[char] for char in mySeq.upper()] IntegerList.append(integer_encoded) return IntegerList
[docs]def list_onehot(IntegerList): OneHotList = list() for integer_encoded in IntegerList: onehot_encoded = list() for value in integer_encoded: letter = [0 for _ in range(4)] letter[value] = 1 onehot_encoded.append(letter) OneHotList.append(onehot_encoded) return OneHotList