@@ -63,20 +63,31 @@ def parse_arguments(args=None):
6363 return parser
6464
6565
66- def filter_scores_target_list (pScoresDictionary , pTargetList = None , pTargetIntervalTree = None , pTargetFile = None ):
66+ def filter_scores_target_list (pScoresDictionary , pTargetFType , pTargetPosDict , pTargetList = None , pTargetIntervalTree = None , pTargetFile = None ):
6767
6868 accepted_scores = {}
6969 same_target_dict = {}
7070 target_regions_intervaltree = None
71- if pTargetList is not None :
72-
71+ # newly added
72+ if pTargetFType == 'hdf5' :
7373 # read hdf content for this specific combination
7474 targetFileHDF5Object = h5py .File (pTargetFile , 'r' )
7575 target_object = targetFileHDF5Object ['/' .join (pTargetList )]
7676 chromosome = target_object .get ('chromosome' )[()].decode ("utf-8" )
7777 start_list = list (target_object ['start_list' ][:])
7878 end_list = list (target_object ['end_list' ][:])
7979 targetFileHDF5Object .close ()
80+ elif pTargetFType == 'bed4' :
81+ chromosome = pTargetPosDict [pTargetList [- 1 ]]['chromosome' ]
82+ start_list = pTargetPosDict [pTargetList [- 1 ]]['start_list' ]
83+ end_list = pTargetPosDict [pTargetList [- 1 ]]['end_list' ]
84+ elif pTargetFType == 'bed3' :
85+ target_regions_intervaltree = pTargetIntervalTree
86+ else :
87+ log .error ('No target list given.' )
88+ raise Exception ('No target list given.' )
89+
90+ if pTargetList is not None :
8091 chromosome = [chromosome ] * len (start_list )
8192
8293 target_regions = list (zip (chromosome , start_list , end_list ))
@@ -85,12 +96,6 @@ def filter_scores_target_list(pScoresDictionary, pTargetList=None, pTargetInterv
8596
8697 hicmatrix = hm .hiCMatrix ()
8798 target_regions_intervaltree = hicmatrix .intervalListToIntervalTree (target_regions )[0 ]
88- elif pTargetIntervalTree is not None :
89- target_regions_intervaltree = pTargetIntervalTree
90-
91- else :
92- log .error ('No target list given.' )
93- raise Exception ('No target list given.' )
9499
95100 for key in pScoresDictionary :
96101 chromosome = pScoresDictionary [key ][0 ]
@@ -193,12 +198,12 @@ def writeAggregateHDF(pOutFileName, pOutfileNamesList, pAcceptedScoresList, pArg
193198 aggregateFileH5Object .close ()
194199
195200
196- def run_target_list_compilation (pInteractionFilesList , pTargetList , pArgs , pViewpointObj , pQueue = None , pOneTarget = False ):
201+ def run_target_list_compilation (pInteractionFilesList , pTargetList , pTargetFType , pTargetPosDict , pArgs , pViewpointObj , pQueue = None , pOneTarget = False ):
197202 outfile_names_list = []
198203 accepted_scores_list = []
199204 target_regions_intervaltree = None
200205 try :
201- if pOneTarget == True :
206+ if pTargetFType == 'bed3' :
202207 try :
203208 target_regions = utilities .readBed (pTargetList )
204209 except Exception as exp :
@@ -211,14 +216,13 @@ def run_target_list_compilation(pInteractionFilesList, pTargetList, pArgs, pView
211216 outfile_names_list_intern = []
212217 accepted_scores_list_intern = []
213218 for sample in interactionFile :
214-
215219 interaction_data , interaction_file_data , _ = pViewpointObj .readInteractionFile (pArgs .interactionFile , sample )
216220 if pOneTarget == True :
217221 target_file = None
218222 else :
219223 target_file = pTargetList [i ]
220224
221- accepted_scores = filter_scores_target_list (interaction_file_data , pTargetList = target_file , pTargetIntervalTree = target_regions_intervaltree , pTargetFile = pArgs .targetFile )
225+ accepted_scores = filter_scores_target_list (interaction_file_data , pTargetFType , pTargetPosDict , pTargetList = target_file , pTargetIntervalTree = target_regions_intervaltree , pTargetFile = pArgs .targetFile )
222226
223227 outfile_names_list_intern .append (sample )
224228 accepted_scores_list_intern .append (accepted_scores )
@@ -238,7 +242,7 @@ def run_target_list_compilation(pInteractionFilesList, pTargetList, pArgs, pView
238242 return
239243
240244
241- def call_multi_core (pInteractionFilesList , pTargetFileList , pFunctionName , pArgs , pViewpointObj ):
245+ def call_multi_core (pInteractionFilesList , pTargetFileList , pTargetFType , pTargetPosDict , pFunctionName , pArgs , pViewpointObj ):
242246 if len (pInteractionFilesList ) < pArgs .threads :
243247 pArgs .threads = len (pInteractionFilesList )
244248 outfile_names_list = [None ] * pArgs .threads
@@ -272,6 +276,8 @@ def call_multi_core(pInteractionFilesList, pTargetFileList, pFunctionName, pArgs
272276 process [i ] = Process (target = pFunctionName , kwargs = dict (
273277 pInteractionFilesList = interactionFileListThread ,
274278 pTargetList = targetFileListThread ,
279+ pTargetFType = pTargetFType ,
280+ pTargetPosDict = pTargetPosDict ,
275281 pArgs = pArgs ,
276282 pViewpointObj = pViewpointObj ,
277283 pQueue = queue [i ],
@@ -318,16 +324,32 @@ def main(args=None):
318324
319325 targetList = []
320326 present_genes = {}
327+ target_ftype = ''
328+ targetPosDict = None
321329 # read hdf file
322330 interactionFileHDF5Object = h5py .File (args .interactionFile , 'r' )
323331 keys_interactionFile = list (interactionFileHDF5Object .keys ())
324332
325333 if h5py .is_hdf5 (args .targetFile ):
326-
327334 targetDict , present_genes = viewpointObj .readTargetHDFFile (args .targetFile )
335+ target_ftype = 'hdf5'
328336
329337 else :
330- targetList = [args .targetFile ]
338+ with open (args .targetFile ) as file :
339+ for line in file .readlines ():
340+ if line .startswith ('#' ):
341+ continue
342+ _line = line .strip ().split ('\t ' )
343+ break
344+ if len (_line ) == 4 :
345+ log .info ('Targets BED contains 4 columns, aggregating on column 4' )
346+ target_ftype = 'bed4'
347+ present_genes , targetDict , targetPosDict = utilities .readTargetBed (args .targetFile )
348+ elif len (_line ) == 3 :
349+ targetList = [args .targetFile ]
350+ target_ftype = 'bed3'
351+ else :
352+ log .error ('BED of targets list must have 3 or 4 columns' )
331353
332354 if len (keys_interactionFile ) > 1 :
333355
@@ -346,7 +368,7 @@ def main(args=None):
346368 geneList2 = sorted (list (matrix_obj2 [chromosome2 ].keys ()))
347369
348370 for gene1 , gene2 in zip (geneList1 , geneList2 ):
349- if h5py . is_hdf5 ( args . targetFile ) :
371+ if target_ftype != 'bed3' :
350372 if gene1 in present_genes [sample ][sample2 ]:
351373 interactionDict [gene1 ] = [[sample , chromosome1 , gene1 ], [sample2 , chromosome2 , gene2 ]]
352374 else :
@@ -356,7 +378,7 @@ def main(args=None):
356378
357379 interactionFileHDF5Object .close ()
358380
359- if h5py . is_hdf5 ( args . targetFile ) :
381+ if target_ftype != 'bed3' :
360382 key_outer_matrix = present_genes .keys ()
361383 for keys_outer in key_outer_matrix :
362384 keys_inner_matrix = present_genes [keys_outer ].keys ()
@@ -365,5 +387,5 @@ def main(args=None):
365387 interactionList .append (interactionDict [gene ])
366388 targetList .append (targetDict [gene ])
367389
368- outfile_names_list , accepted_scores_list = call_multi_core (interactionList , targetList , run_target_list_compilation , args , viewpointObj )
390+ outfile_names_list , accepted_scores_list = call_multi_core (interactionList , targetList , target_ftype , targetPosDict , run_target_list_compilation , args , viewpointObj )
369391 writeAggregateHDF (args .outFileName , outfile_names_list , accepted_scores_list , args )
0 commit comments