"""
Methods used in the Transform step of a ETL pipeline.
"""
[docs]def list_integer(SeqList):
'''define input values'''
alphabet = 'ACGT'
char_to_int = dict((c,i) for i,c in enumerate(alphabet))
IntegerList = list()
for mySeq in SeqList:
# integer encode input data
integer_encoded = [char_to_int[char] for char in mySeq.upper()]
IntegerList.append(integer_encoded)
return IntegerList
[docs]def list_onehot(IntegerList):
OneHotList = list()
for integer_encoded in IntegerList:
onehot_encoded = list()
for value in integer_encoded:
letter = [0 for _ in range(4)]
letter[value] = 1
onehot_encoded.append(letter)
OneHotList.append(onehot_encoded)
return OneHotList