Flexible BEDPE generation

The first step is to create an viola.Vcf object.

In [1]: import viola

In [2]: vcf = viola.read_vcf('https://raw.githubusercontent.com/dermasugita/ViolaDocs/main/docs/html/_static/tutorial.manta.vcf', patient_name='manta1')
---------------------------------------------------------------------------
HTTPError                                 Traceback (most recent call last)
Cell In [2], line 1
----> 1 vcf = viola.read_vcf('https://raw.githubusercontent.com/dermasugita/ViolaDocs/main/docs/html/_static/tutorial.manta.vcf', patient_name='manta1')

File ~/checkouts/readthedocs.org/user_builds/viola-sv/checkouts/latest/src/viola/io/parser.py:569, in read_vcf(filepath_or_buffer, variant_caller, patient_name)
    567 # read vcf files using PyVcf package
    568 if isinstance(filepath_or_buffer, str) and is_url(filepath_or_buffer):
--> 569     b = StringIO(urllib.request.urlopen(filepath_or_buffer).read().decode('utf-8'))
    570     vcf_reader = vcf.Reader(b)
    571 elif isinstance(filepath_or_buffer, str):

File ~/.asdf/installs/python/3.9.13/lib/python3.9/urllib/request.py:214, in urlopen(url, data, timeout, cafile, capath, cadefault, context)
    212 else:
    213     opener = _opener
--> 214 return opener.open(url, data, timeout)

File ~/.asdf/installs/python/3.9.13/lib/python3.9/urllib/request.py:523, in OpenerDirector.open(self, fullurl, data, timeout)
    521 for processor in self.process_response.get(protocol, []):
    522     meth = getattr(processor, meth_name)
--> 523     response = meth(req, response)
    525 return response

File ~/.asdf/installs/python/3.9.13/lib/python3.9/urllib/request.py:632, in HTTPErrorProcessor.http_response(self, request, response)
    629 # According to RFC 2616, "2xx" code indicates that the client's
    630 # request was successfully received, understood, and accepted.
    631 if not (200 <= code < 300):
--> 632     response = self.parent.error(
    633         'http', request, response, code, msg, hdrs)
    635 return response

File ~/.asdf/installs/python/3.9.13/lib/python3.9/urllib/request.py:561, in OpenerDirector.error(self, proto, *args)
    559 if http_err:
    560     args = (dict, 'default', 'http_error_default') + orig_args
--> 561     return self._call_chain(*args)

File ~/.asdf/installs/python/3.9.13/lib/python3.9/urllib/request.py:494, in OpenerDirector._call_chain(self, chain, kind, meth_name, *args)
    492 for handler in handlers:
    493     func = getattr(handler, meth_name)
--> 494     result = func(*args)
    495     if result is not None:
    496         return result

File ~/.asdf/installs/python/3.9.13/lib/python3.9/urllib/request.py:641, in HTTPDefaultErrorHandler.http_error_default(self, req, fp, code, msg, hdrs)
    640 def http_error_default(self, req, fp, code, msg, hdrs):
--> 641     raise HTTPError(req.full_url, code, msg, hdrs, fp)

HTTPError: HTTP Error 404: Not Found

Then use to_bedpe_like method to generate bedpe-formatted pandas DataFrame.

In [3]: vcf_bedpe_like = vcf.to_bedpe_like()

In [4]: print(vcf_bedpe_like)
  chrom1     start1       end1 chrom2     start2       end2     name score strand1 strand2
0   chr1   82550460   82550461   chr1   82554225   82554226    test1  None       +       -
1   chr1   22814216   22814217   chr1   92581131   92581132    test2  None       -       -
2   chr1   60567905   60567906   chr1   60675940   60675941    test3  None       +       -
3   chr1   69583189   69583190   chr1   69590947   69590948    test4  None       +       -
4  chr11  104534876  104534877  chr11  104536573  104536574    test5  None       +       -
5  chr11  111134696  111134697  chr17   26470494   26470495  test6_1  None       +       -
6  chr17   26470494   26470495  chr11  111134696  111134697  test6_2  None       -       +

If you want to add “SVLEN” and “CIPOS” fields, run as follows:

In [5]: vcf_bedpe_like_with_info = vcf.to_bedpe_like(custom_infonames=['svlen', 'cipos'])

In [6]: print(vcf_bedpe_like_with_info)
  chrom1     start1       end1 chrom2     start2  ...  strand1 strand2   svlen_0 cipos_0 cipos_1
0   chr1   82550460   82550461   chr1   82554225  ...        +       -     -3764     -51      52
1   chr1   22814216   22814217   chr1   92581131  ...        -       -  69766915     -51      51
2   chr1   60567905   60567906   chr1   60675940  ...        +       -   -108034     -44      44
3   chr1   69583189   69583190   chr1   69590947  ...        +       -     -7757    -123     123
4  chr11  104534876  104534877  chr11  104536573  ...        +       -     -1696     -68      69
5  chr11  111134696  111134697  chr17   26470494  ...        +       -         0    -118     118
6  chr17   26470494   26470495  chr11  111134696  ...        -       +         0     -81      82

[7 rows x 13 columns]

To add FORMAT, set add_formats = True:

In [7]: vcf_bedpe_like_with_format = vcf.to_bedpe_like(add_formats=True)

In [8]: print(vcf_bedpe_like_with_format)
  chrom1     start1       end1 chrom2     start2  ...  sample1_N_SR_1 sample1_T_PR_0 sample1_T_PR_1 sample1_T_SR_0 sample1_T_SR_1
0   chr1   82550460   82550461   chr1   82554225  ...             0.0           43.0            4.0           15.0            3.0
1   chr1   22814216   22814217   chr1   92581131  ...             NaN           35.0            5.0            NaN            NaN
2   chr1   60567905   60567906   chr1   60675940  ...             NaN           44.0            6.0            NaN            NaN
3   chr1   69583189   69583190   chr1   69590947  ...             NaN           20.0           12.0            NaN            NaN
4  chr11  104534876  104534877  chr11  104536573  ...             NaN           57.0           14.0            NaN            NaN
5  chr11  111134696  111134697  chr17   26470494  ...             NaN           45.0            5.0            NaN            NaN
6  chr17   26470494   26470495  chr11  111134696  ...             NaN           45.0            5.0            NaN            NaN

[7 rows x 18 columns]

Do the same thing for adding FILTER:

In [9]: vcf_bedpe_like_with_filter = vcf.to_bedpe_like(add_filters=True)

In [10]: print(vcf_bedpe_like_with_filter)
  chrom1     start1       end1 chrom2     start2  ...  score strand1 strand2 MinSomaticScore   PASS
0   chr1   82550460   82550461   chr1   82554225  ...   None       +       -            True  False
1   chr1   22814216   22814217   chr1   92581131  ...   None       -       -            True  False
2   chr1   60567905   60567906   chr1   60675940  ...   None       +       -            True  False
3   chr1   69583189   69583190   chr1   69590947  ...   None       +       -           False   True
4  chr11  104534876  104534877  chr11  104536573  ...   None       +       -           False   True
5  chr11  111134696  111134697  chr17   26470494  ...   None       +       -            True  False
6  chr17   26470494   26470495  chr11  111134696  ...   None       -       +            True  False

[7 rows x 12 columns]