#!/usr/bin/perl ## Copyright (C) 2019 Marco Notaro ## ## License: Perl5 (https://dev.perl.org/licenses/) ## loading needed perl modules use strict; use warnings; use Pod::Usage; ## https://metacpan.org/pod/Pod::Usage use Getopt::Long; ## https://metacpan.org/pod/Getopt::Long use Spreadsheet::Read; ## https://metacpan.org/pod/Spreadsheet::Read use Time::HiRes qw(time); ## https://metacpan.org/pod/Time::HiRes ## perl help/manual my %opt=(); GetOptions(\%opt, 'help|?', 'man') or pod2usage(2); pod2usage(1) if $opt{help}; pod2usage(-exitval => 0, -verbose => 2) if $opt{man}; ## begin script my $start= time; ## input files my($oncoppigenes, $oncoppi, $outfile)= @ARGV; ## some checks if($#ARGV!=2){ die "Number of input arguments required: *3*.\nGet more info by calling help: ./build_oncoppi_ensemble.pl --help\n"; } if($ARGV[0]!~/.xlsx$|.xls$|.ods$|.csv$/){ die "Wrong file extension.\nExtesnion of first file must be one of the following: .xlsx|.xls|.ods|.csv.\nPlease see help\n"; } if($ARGV[1]!~/.xgmml$/){ die "Wrong file extension.\nExtension of the second file must be .xgmml.\nPlease see help\n"; } # reading input files my $spreadsheet= ReadData($oncoppigenes); my @rows= Spreadsheet::Read::rows($spreadsheet->[1]); ## step1: map gene-symbol_2_ensemble-geneID from xlsx file provided by oncoppi my %ensemble2genename= (); foreach my $i (3..scalar(@rows)){ ## the first 2 rows are header, so jump them my $genesymbol= $spreadsheet->[1]{cell}[2][$i]; my $ensembleid= $spreadsheet->[1]{cell}[4][$i]; $ensemble2genename{$genesymbol}=$ensembleid; } ## step2: print oncoppi network in tupla format by using ensemble-geneID as entry open IN, $oncoppi; open OUT, "> $outfile"; while(){ next if / print a brief help message =item B<--man (-m)> print the manual page =back =head1 ARGUMENTS =over 3 =item B file containing the mapping between identifiers (eg oncoppi.genes.xlsx) file format can be xlsx|xls|ods|csv (note: csv must be comma or semicolon separated); =item B file showing protein-protein interactions (PPIs) in oncoppi network oncoppi.cytoscape.xgmml: the file format must be xgmml =item B name of the file where redirecting the oncoppi network in tupla format (eg. oncoppi.txt) =back =head1 DESCRIPTION Build oncoppi network in tupla format: p1 p2 score, where p1/p2 are ensemble gene id and the score is unitary. To this end we parsed the mapping file and the network file provided by oncoppi paper (doi: 10.1038/ncomms14356). =head1 COPYRIGHT LICENSE AND DISCLAIMER Copyright (C) 2019 Marco Notaro, all rights reserved, This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License version 3, GPLv3 (https://www.gnu.org/licenses/gpl-3.0.en.html) This program is distributed in the hope that it will be useful, but without any warranty; without even the implied warranty of merchantability or fitness for a particular purpose. =head1 AUTHORS Marco Notaro (https://marconotaro.github.io) =cut # yowza yowza yowza.