0

I would like to subset columns in a data frame in R based on the specific row of the first column into other data frames

df1 <- data.frame(Ensembl_ID = c("ENSG00000000003", "ENSG00000000005", "ENSG00000000419", "ENSG00000000457", "ENSG00000000460", "ENSG00000000938", "ENSG00000000971", "ENSG00000001036", "ENSG00000001084", "ENSG00000001167" ), logFC.1 = c(0.147447019707984, -0.278643924528991, 0.00638502079233481, 0.00248371473862579, 0.0591639590814736, -0.0892578080659792, -0.0139042150604349, 0.15210410748665, -0.0273174541997048, 0.0373813166759115 ), logFC.2 = c(0.14237211045168, -0.153847067952652, 0.00806519294435945, -0.0243298183425441, 0.0639184480028851, -0.0791126460573967, -0.0517704622015086, 0.100033161692714, 0.105136768894399, 0.0509474174745926 ), logFC.3 = c(0.0692402101693023, -0.212626837128185, 0.0665466667502187, 0.0189664498456434, 0.073631371224761, -0.0642014520794086, 0.0115060035255512, 0.104767159584613, 0.140378485980222, 0.0814931176279395), logFC.4 = c(0.175916688982428, -0.0606440302201137, 0.0862627141013101, 0.105179938123113, 0.128866411791584, -0.0988927171791539, 0.128758540724723, 0.0997656895899759, 0.345468063926355, 0.130898388184307), logFC.5 = c(0.144743421921328, 0.247159332221974, 0.0232237466183996, 0.0800788300610377, 0.178887735169961, -0.0592727391427514, -0.0723099661837084, 0.0387715967173523, -0.0607793368610136, 0.110464511693512), logFC.6 = c(0.0848187321362019, -0.299283590551811, 0.0366788808661408, -0.00763280370062748, 0.0145148270035513, -0.0384916970002755, -0.0000335640771631606, 0.0851895375297912, -0.00364050261322463, 0.0602143760128463), logFC.7 = c(0.305256444042024, -0.274308408751318, 0.0977066795857243, -0.0265659018074027, 0.136348613124811, -0.0938364533000299, -0.143634179166262, 0.139913812601005, 0.268708965044232, 0.133427360632365), logFC.8 = c(0.12744808339884, -0.285015311267508, 0.0459140048745496, -0.00976012971218515, 0.13292412700208, -0.184687147498946, -0.0411558715447517, 0.165717944056239, 0.323358546432839, 0.0502386767987279), logFC.9 = c(0.286824598926274, 0.095530985319937, 0.101370835445593, 0.0352336819150421, 0.0573659992830985, -0.0739779010955875, 0.00466993628480923, 0.0486643748696862, 0.0322601740536419, 0.0873158516027886))

I would like to select the columns which have "ENSG00000000005" row values less than -0.1 (<-0.1) so logFC.5, logFC.9 column will be excluded in output.

df1 <- data.frame(Ensembl_ID = c("ENSG00000000003", "ENSG00000000005", "ENSG00000000419", "ENSG00000000457", "ENSG00000000460", "ENSG00000000938", "ENSG00000000971", "ENSG00000001036", "ENSG00000001084", "ENSG00000001167" ), logFC.1 = c(0.147447019707984, -0.278643924528991, 0.00638502079233481, 0.00248371473862579, 0.0591639590814736, -0.0892578080659792, -0.0139042150604349, 0.15210410748665, -0.0273174541997048, 0.0373813166759115 ), logFC.2 = c(0.14237211045168, -0.153847067952652, 0.00806519294435945, -0.0243298183425441, 0.0639184480028851, -0.0791126460573967, -0.0517704622015086, 0.100033161692714, 0.105136768894399, 0.0509474174745926 ), logFC.3 = c(0.0692402101693023, -0.212626837128185, 0.0665466667502187, 0.0189664498456434, 0.073631371224761, -0.0642014520794086, 0.0115060035255512, 0.104767159584613, 0.140378485980222, 0.0814931176279395), logFC.4 = c(0.175916688982428, -0.0606440302201137, 0.0862627141013101, 0.105179938123113, 0.128866411791584, -0.0988927171791539, 0.128758540724723, 0.0997656895899759, 0.345468063926355, 0.130898388184307), logFC.6 = c(0.0848187321362019, -0.299283590551811, 0.0366788808661408, -0.00763280370062748, 0.0145148270035513, -0.0384916970002755, -0.0000335640771631606, 0.0851895375297912, -0.00364050261322463, 0.0602143760128463), logFC.7 = c(0.305256444042024, -0.274308408751318, 0.0977066795857243, -0.0265659018074027, 0.136348613124811, -0.0938364533000299, -0.143634179166262, 0.139913812601005, 0.268708965044232, 0.133427360632365), logFC.8 = c(0.12744808339884, -0.285015311267508, 0.0459140048745496, -0.00976012971218515, 0.13292412700208, -0.184687147498946, -0.0411558715447517, 0.165717944056239, 0.323358546432839, 0.0502386767987279))

The expected output would like to be

Some ideas?

Cheers!

2
  • I tried df[,df[df$Name=="Bill",-1 ]< -0.1], but it resulted in S1, S2, S5, and S6, so it is not the expected result. Commented Aug 12, 2021 at 9:30
  • @AbdurRohman Exactly it is not working. Commented Aug 12, 2021 at 9:32

1 Answer 1

2

With BaseR

df1[,c(TRUE,df1[df1$Ensembl_ID=="ENSG00000000005",-1 ]< -0.1)]

gives,

       Ensembl_ID      logFC.1      logFC.2     logFC.3       logFC.6     logFC.7     logFC.8
1  ENSG00000000003  0.147447020  0.142372110  0.06924021  8.481873e-02  0.30525644  0.12744808
2  ENSG00000000005 -0.278643925 -0.153847068 -0.21262684 -2.992836e-01 -0.27430841 -0.28501531
3  ENSG00000000419  0.006385021  0.008065193  0.06654667  3.667888e-02  0.09770668  0.04591400
4  ENSG00000000457  0.002483715 -0.024329818  0.01896645 -7.632804e-03 -0.02656590 -0.00976013
5  ENSG00000000460  0.059163959  0.063918448  0.07363137  1.451483e-02  0.13634861  0.13292413
6  ENSG00000000938 -0.089257808 -0.079112646 -0.06420145 -3.849170e-02 -0.09383645 -0.18468715
7  ENSG00000000971 -0.013904215 -0.051770462  0.01150600 -3.356408e-05 -0.14363418 -0.04115587
8  ENSG00000001036  0.152104107  0.100033162  0.10476716  8.518954e-02  0.13991381  0.16571794
9  ENSG00000001084 -0.027317454  0.105136769  0.14037849 -3.640503e-03  0.26870897  0.32335855
10 ENSG00000001167  0.037381317  0.050947417  0.08149312  6.021438e-02  0.13342736  0.05023868

Note that column4 also should be excluded since -0.06064403 > -0.1

Sign up to request clarification or add additional context in comments.

1 Comment

It is working now. Last time it was showing some prob. Thank you for your help dear.

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.