riboraptor.coherence.get_periodicity(values, input_is_stream=False)[source]¶Calculate periodicty wrt 1-0-0 signal.
| Parameters: |
|
|---|---|
| Returns: |
|
riboraptor.coherence.naive_periodicity(values, identify_peak=False)[source]¶Calculate periodicity in a naive manner
Take ratio of frame1 over avg(frame2+frame3) counts. By default the first value is treated as the first frame as well
| Parameters: |
|
|---|---|
| Returns: |
|
Utilities for read counting operations.
riboraptor.count.bam_to_bedgraph(bam, strand=u'both', end_type=u'5prime', saveto=None)[source]¶Create bigwig from bam.
| Parameters: |
|
|---|---|
| Returns: |
|
riboraptor.count.bedgraph_to_bigwig(bedgraph, sizes, saveto, input_is_stream=False)[source]¶Convert bedgraph to bigwig.
| Parameters: |
|
|---|
riboraptor.count.collapse_gene_coverage_to_metagene(gene_coverages, target_length, outfile=None)[source]¶Collapse gene coverages to specific target length.
| Parameters: |
|
|---|---|
| Returns: |
|
riboraptor.count.count_feature_genewise(feature_bed, bam, force_strandedness=False, use_multiprocessing=False)[source]¶Count features genewise.
| Parameters: |
|
|---|---|
| Returns: |
|
riboraptor.count.count_reads_bed(bam, region_bed_f, saveto)[source]¶Count number of reads following in each region.
| Parameters: |
|
|---|---|
| Returns: |
|
riboraptor.count.count_reads_in_features(feature_bed, bam, force_strandedness=False, use_multiprocessing=False)[source]¶Count reads overlapping features.
| Parameters: |
|
|---|
riboraptor.count.count_reads_per_gene(bw, bed, prefix=None, n_cores=16, collapse_intervals=True)[source]¶Count number of reads following in each region.
| Parameters: |
|
|---|---|
| Returns: |
|
riboraptor.count.count_utr5_utr3_cds(bam, utr5_bed=None, cds_bed=None, utr3_bed=None, genome=None, force_strandedness=False, genewise=False, saveto=None, use_multiprocessing=False)[source]¶One shot counts over UTR5/UTR3/CDS.
| Parameters: |
|
|---|---|
| Returns: |
|
riboraptor.count.diff_region_enrichment(numerator, denominator, prefix)[source]¶Calculate enrichment of counts of one region over another.
| Parameters: |
|
|---|---|
| Returns: |
|
riboraptor.count.export_gene_coverages(bigwig, region_bed_f, saveto, offset_5p=60, offset_3p=0, ignore_tx_version=True)[source]¶Export all gene coverages.
| Parameters: |
|
|---|---|
| Returns: |
|
riboraptor.count.export_metagene_coverage(bigwig, region_bed_f, max_positions=None, saveto=None, offset_5p=60, offset_3p=0, ignore_tx_version=True)[source]¶Calculate metagene coverage.
| Parameters: |
|
|---|---|
| Returns: |
|
riboraptor.count.extract_uniq_mapping_reads(inbam, outbam)[source]¶Extract only uniquely mapping reads from a bam.
| Parameters: |
|
|---|
riboraptor.count.gene_coverage(gene_name, bed, bw, gene_group=None, offset_5p=0, offset_3p=0, collapse_intervals=True)[source]¶Get gene coverage.
| Parameters: |
|
|---|---|
| Returns: |
|
riboraptor.count.gene_coverage_sum(gene_name, bed, bw, collapse_intervals=True)[source]¶Keep track of only the sum
| Parameters: |
|
|---|
riboraptor.count.get_fasta_sequence(fasta, intervals)[source]¶Extract fasta sequence given a list of intervals.
| Parameters: |
|
|---|---|
| Returns: |
|
riboraptor.count.get_region_sizes(bed)[source]¶Get collapsed lengths of gene in bed.
| Parameters: |
|
|---|---|
| Returns: |
|
riboraptor.count.htseq_to_cpm(htseq_f, saveto=None)[source]¶Convert HTSeq counts to CPM.
| Parameters: |
|
|---|---|
| Returns: |
|
riboraptor.count.htseq_to_tpm(htseq_f, cds_bed_f, saveto=None)[source]¶Convert HTSeq counts to TPM.
| Parameters: |
|
|---|---|
| Returns: |
|
riboraptor.count.interval_coverage(bw, intervals)[source]¶Get coverage at custom intervals
| Parameters: |
|
|---|---|
| Returns: |
|
riboraptor.count.mapping_reads_summary(bam, prefix)[source]¶Count number of mapped reads.
| Parameters: |
|
|---|---|
| Returns: |
|
riboraptor.count.pickle_bed_file(bed, collapse_intervals=True)[source]¶Create a lookup pickle file for genewise CDS/UTR coordinates.
In order to prevent recalculating the coordinates that should be fetched for each genes’ CDS or UTR regions, they can be stored in a pickle file.
| Parameters: |
|
|---|
riboraptor.count.read_enrichment(read_lengths, enrichment_range=[28, 29, 30, 31, 32], input_is_stream=False, input_is_file=False)[source]¶Calculate read enrichment for a certain range of lengths
| Parameters: |
|
|---|---|
| Returns: |
|
riboraptor.count.read_htseq(htseq_f)[source]¶Read HTSeq file.
| Parameters: |
|
|---|---|
| Returns: |
|
Utilities to download data from NCBI SRA
riboraptor.download.run_download_sra_script(download_root_location=None, ascp_key_path=None, srp_id_file=None, srp_id_list=None)[source]¶Download data from SRA.
| Parameters: |
|
|---|
riboraptor.dtw.dtw(X, Y, metric=u'euclidean', ddtw=False, ddtw_order=1)[source]¶| Parameters: |
|
|---|
riboraptor.dtw.get_path(D)[source]¶Traceback path of minimum cost
Given accumulated cost matrix D, trace back the minimum cost path
| Parameters: |
|
|---|---|
| Returns: |
|
riboraptor.fasta.complete_gene_fasta(utr5_bed_f, cds_bed_f, utr3_bed_f, fasta_f, prefix)[source]¶Merge Utr5, CDS, UTR3 coordinates to get one fasta.
| Parameters: |
|
|---|
riboraptor.fasta.export_all_fasta(region_bed_f, chrom_sizes, fasta, prefix, offset_5p=60, offset_3p=0, ignore_tx_version=True)[source]¶Export all gene coverages.
| Parameters: |
|
|---|
riboraptor.fasta.export_fasta_from_bed(gene_name, bed, chrom_sizes, fasta_f, gene_group=None, offset_5p=0, offset_3p=0)[source]¶Extract fasta genewise given coordinates in bed file
| Parameters: |
|
|---|---|
| Returns: |
|
riboraptor.fasta.get_fasta_sequence(fasta_f, intervals)[source]¶Extract fasta sequence given a list of intervals.
| Parameters: |
|
|---|---|
| Returns: |
|
All functions that are not so useful, but still useful.
riboraptor.helpers.check_file_exists(filepath)[source]¶Check if file exists.
| Parameters: |
|
|---|
riboraptor.helpers.codon_to_anticodon(codon)[source]¶Codon to anticodon.
| Parameters: |
|
|---|
riboraptor.helpers.collapse_bed_intervals(intervals, chromosome_lengths=None, offset_5p=0, offset_3p=0)[source]¶Collapse intervals into non overlapping manner
# NOTE # TODO : This function has a subtle bug that it will be offset by 1 # position when the gene is on negative strand # So essentially if you have CDS on a negative strand # The first position should be discarded # Similary for the last position in the gene on + strand # you have an extra position in the end
| Parameters: |
|
|---|---|
| Returns: |
|
riboraptor.helpers.create_ideal_periodic_signal(signal_length)[source]¶Create ideal ribo-seq signal.
| Parameters: |
|
|---|---|
| Returns: |
|
riboraptor.helpers.get_strandedness(filepath)[source]¶Parse output of infer_experiment.py from RSeqC to get strandedness.
| Parameters: |
|
|---|---|
| Returns: |
|
riboraptor.helpers.identify_peaks(coverage)[source]¶Given coverage array, find the site of maximum density
riboraptor.helpers.list_to_ranges(list_of_int)[source]¶Convert a list to a list of range object
| Parameters: |
|
|---|---|
| Returns: |
|
riboraptor.helpers.millify(n)[source]¶Convert integer to human readable format.
| Parameters: |
|
|---|---|
| Returns: |
|
riboraptor.helpers.pad_five_prime_or_truncate(some_list, offset_5p, target_len)[source]¶Pad first the 5prime end and then the 3prime end or truncate
| Parameters: |
|
|---|
riboraptor.helpers.pad_or_truncate(some_list, target_len)[source]¶Pad or truncate a list upto given target length
| Parameters: |
|
|---|
riboraptor.helpers.parse_star_logs(infile, outfile=None)[source]¶Parse star logs into a dict
| Parameters: |
|
|---|---|
| Returns: |
|
riboraptor.helpers.r2(x, y)[source]¶Calculate pearson correlation between two vectors.
| Parameters: |
|
|---|
riboraptor.helpers.round_to_nearest(x, base=5)[source]¶Round to nearest base.
| Parameters: |
|
|---|---|
| Returns: |
|
riboraptor.helpers.set_xrotation(ax, degrees)[source]¶Rotate labels on x-axis.
| Parameters: |
|
|---|
riboraptor.helpers.summarize_counters(samplewise_dict)[source]¶Summarize gene counts for a collection of samples.
| Parameters: |
|
|---|---|
| Returns: |
|
riboraptor.helpers.summary_stats_two_arrays_welch(old_mean_array, new_array, old_var_array=None, old_n_counter=None, carried_forward_observations=None)[source]¶Average two arrays using welch’s method
| Parameters: |
|
|---|---|
| Returns: |
|
Plotting methods.
riboraptor.plotting.plot_featurewise_barplot(utr5_counts, cds_counts, utr3_counts, ax=None, saveto=None, **kwargs)[source]¶Plot barplots for 5’UTR/CDS/3’UTR counts.
| Parameters: |
|
|---|
riboraptor.plotting.plot_framewise_counts(counts, frames_to_plot=u'all', ax=None, title=None, millify_labels=False, position_range=None, saveto=None, ascii=False, input_is_stream=False, **kwargs)[source]¶Plot framewise distribution of reads.
| Parameters: |
|
|---|
riboraptor.plotting.plot_read_counts(counts, ax=None, marker=None, color=u'royalblue', title=None, label=None, millify_labels=False, identify_peak=True, saveto=None, position_range=None, ascii=False, input_is_stream=False, ylabel=u'Normalized RPF density', **kwargs)[source]¶Plot RPF density aro und start/stop codons.
| Parameters: |
|
|---|
riboraptor.plotting.plot_read_length_dist(read_lengths, ax=None, millify_labels=True, input_is_stream=False, title=None, saveto=None, ascii=False, **kwargs)[source]¶Plot read length distribution.
| Parameters: |
|
|---|
riboraptor.plotting.setup_axis(ax, axis=u'x', majorticks=5, minorticks=1, xrotation=45, yrotation=0)[source]¶Setup axes defaults
| Parameters: |
|
|---|
riboraptor.statistics.KDE(values)[source]¶Perform Univariate Kernel Density Estimation.
Wrapper utility around statsmodels for quick KDE TODO: scikit-learn has a faster implementation (?)
| Parameters: |
|
|---|---|
| Returns: |
|
riboraptor.statistics.KS_test(a, b)[source]¶Perform KS test between a and b values
| Parameters: |
|
|---|---|
| Returns: |
|
riboraptor.wig.WigReader(wig_location)[source]¶Bases: object
Class for reading and querying wigfiles.
get_chromosomes¶Return list of chromsome and their sizes as in the wig file.
| Returns: |
|
|---|