r - Unique coordinates using GenomicRanges -


how can find unique (non overlapping genomic coordinates) comparing 2 datasets using genomicranges?

dataset1 =

chr start         end       cna 1   170900001   171500001   loss 1   11840001    19420001    loss 1   60300001    62700001    gain 1   25520001    25820001    gain 

dataset2 =

chr  start       end        cna 1   170940001   171500001   gain 1   60300001    62700001    gain 1   25520001    25840001    gain 1   119860001   123040001   loss 1   171500001   171580001   gain 1   79240001    84420001    gain 

expected output

     chr     start       end        cna      1     170940001 171500001  gain      1    119860001  123040001    loss      1    171500001  171580001    gain      1    79240001   84420001     gain 

try this:

require("genomicranges"        )  #data x1 <- read.table(text="chr start         end       cna 1   170900001   171500001   loss 1   11840001    19420001    loss 1   60300001    62700001    gain 1   25520001    25820001    gain",header=true)  x2 <- read.table(text="chr  start       end        cna 1   170940001   171500001   loss 1   60300001    62700001    gain 1   25520001    25840001    gain 1   119860001   123040001   loss 1   171500001   171580001   gain 1   79240001    84420001    gain",header=true)  g1 <-  granges(seqnames=paste0("chr",x1$chr),                iranges(start=x1$start,                        end=x1$end)                )  g2 <-  granges(seqnames=paste0("chr",x2$chr),                iranges(start=x2$start,                        end=x2$end)                )  #result setdiff(g1,g2)  #output # granges object 2 ranges , 0 metadata columns: #       seqnames                 ranges strand #          <rle>              <iranges>  <rle> #   [1]     chr1 [ 11840001,  19420001]      * #   [2]     chr1 [170900001, 170940000]      * #   ------- #   seqinfo: 1 sequence unspecified genome; no seqlengths 

edit: subset based on cna , setdiff, merge - c(). although, doesn't match expected output...

#result g1_loss <- setdiff(g1[g1@elementmetadata$cna=="loss"],                    g2[g2@elementmetadata$cna=="loss"]) g1_loss@elementmetadata$cna <- "loss"  g2_loss <- setdiff(g2[g2@elementmetadata$cna=="loss"],                    g1[g1@elementmetadata$cna=="loss"]) g2_loss@elementmetadata$cna <- "loss"  g1_gain <- setdiff(g1[g1@elementmetadata$cna=="gain"],                    g2[g2@elementmetadata$cna=="gain"]) g1_gain@elementmetadata$cna <- "gain"  g2_gain <- setdiff(g2[g2@elementmetadata$cna=="gain"],                    g1[g1@elementmetadata$cna=="gain"]) g2_gain@elementmetadata$cna <- "gain"  #merge c(g1_gain,g2_gain,g1_loss,g1_gain)  #output # granges object 5 ranges , 1 metadata column: #     seqnames                 ranges strand |         cna #        <rle>              <iranges>  <rle> | <character> # [1]     chr1 [ 25820002,  25840001]      * |        gain # [2]     chr1 [ 79240001,  84420001]      * |        gain # [3]     chr1 [170940001, 171580001]      * |        gain # [4]     chr1 [ 11840001,  19420001]      * |        loss # [5]     chr1 [170900001, 171500001]      * |        loss # ------- #   seqinfo: 1 sequence unspecified genome; no seqlengths 

Comments

Popular posts from this blog

css - SVG using textPath a symbol not rendering in Firefox -

Java 8 + Maven Javadoc plugin: Error fetching URL -

datatable - Matlab struct computations -