r - Unique coordinates using GenomicRanges -
how can find unique (non overlapping genomic coordinates) comparing 2 datasets using genomicranges?
dataset1 =
chr start end cna 1 170900001 171500001 loss 1 11840001 19420001 loss 1 60300001 62700001 gain 1 25520001 25820001 gain
dataset2 =
chr start end cna 1 170940001 171500001 gain 1 60300001 62700001 gain 1 25520001 25840001 gain 1 119860001 123040001 loss 1 171500001 171580001 gain 1 79240001 84420001 gain
expected output
chr start end cna 1 170940001 171500001 gain 1 119860001 123040001 loss 1 171500001 171580001 gain 1 79240001 84420001 gain
try this:
require("genomicranges" ) #data x1 <- read.table(text="chr start end cna 1 170900001 171500001 loss 1 11840001 19420001 loss 1 60300001 62700001 gain 1 25520001 25820001 gain",header=true) x2 <- read.table(text="chr start end cna 1 170940001 171500001 loss 1 60300001 62700001 gain 1 25520001 25840001 gain 1 119860001 123040001 loss 1 171500001 171580001 gain 1 79240001 84420001 gain",header=true) g1 <- granges(seqnames=paste0("chr",x1$chr), iranges(start=x1$start, end=x1$end) ) g2 <- granges(seqnames=paste0("chr",x2$chr), iranges(start=x2$start, end=x2$end) ) #result setdiff(g1,g2) #output # granges object 2 ranges , 0 metadata columns: # seqnames ranges strand # <rle> <iranges> <rle> # [1] chr1 [ 11840001, 19420001] * # [2] chr1 [170900001, 170940000] * # ------- # seqinfo: 1 sequence unspecified genome; no seqlengths
edit: subset based on cna , setdiff, merge - c()
. although, doesn't match expected output...
#result g1_loss <- setdiff(g1[g1@elementmetadata$cna=="loss"], g2[g2@elementmetadata$cna=="loss"]) g1_loss@elementmetadata$cna <- "loss" g2_loss <- setdiff(g2[g2@elementmetadata$cna=="loss"], g1[g1@elementmetadata$cna=="loss"]) g2_loss@elementmetadata$cna <- "loss" g1_gain <- setdiff(g1[g1@elementmetadata$cna=="gain"], g2[g2@elementmetadata$cna=="gain"]) g1_gain@elementmetadata$cna <- "gain" g2_gain <- setdiff(g2[g2@elementmetadata$cna=="gain"], g1[g1@elementmetadata$cna=="gain"]) g2_gain@elementmetadata$cna <- "gain" #merge c(g1_gain,g2_gain,g1_loss,g1_gain) #output # granges object 5 ranges , 1 metadata column: # seqnames ranges strand | cna # <rle> <iranges> <rle> | <character> # [1] chr1 [ 25820002, 25840001] * | gain # [2] chr1 [ 79240001, 84420001] * | gain # [3] chr1 [170940001, 171580001] * | gain # [4] chr1 [ 11840001, 19420001] * | loss # [5] chr1 [170900001, 171500001] * | loss # ------- # seqinfo: 1 sequence unspecified genome; no seqlengths
Comments
Post a Comment