1

I'm a student who just started learning computer systems using C.

I have a question about how memory allocation works about literal strings.

I tried to analyze how string literals are allocated in memory system using the code below:

#include <stdio.h>

int main(){
   char *p = "1";
   printf("%s\n", p);
   printf("%p\n", p);

   p = "2";
   printf("%s\n", p);
   printf("%p\n", p);

   p = "3";
   printf("%s\n", p);
   printf("%p\n", p);

   p = "4";
   printf("%s\n", p);
   printf("%p\n", p);
}

and when executed, the result appears like this:

1
0000000000404000
2
000000000040400A
3
000000000040400C
4
000000000040400E

According to my shallow knowledge from studying, sizes of all the above strings are 2 (including \0), so the space allocated for them must also be 2.

From string '2' to '4', the memory allocated as I expected, but when allocating '2' after '1', the system made a gap of 8-bit size.

Is there something allocated between those two strings? Or is there some reason for the system to allocate the string like this?

3
  • 1
    Your format strings "%s\n" and "%p\n" might be there. You could try assigning the format strings to pointers as well. Then you could look at all of your string addresses. Note that it's probably reusing the same storage for duplicate string literals. Commented Jun 3, 2021 at 5:14
  • hex dump (or notepad) the executable, you will probably see the string table Commented Jun 3, 2021 at 5:15
  • Thank you for the comments! I wasn't able to understand it thoroughly with only the comments, but now I can see the point of what you guys were trying to say from the below answer... Commented Jun 3, 2021 at 5:23

2 Answers 2

4

The spacing includes the printf format strings as well:

0000000000404000 "1\0"
0000000000404002 "%s\n\0"
0000000000404006 "%p\n\0"
000000000040400A "2\0"
000000000040400C "3\0"
000000000040400E "4\0"

Note that the strings appear in the same order as they appear in the source code, but the printf format strings are not duplicated for each call. Since the strings are identical, they only need to appear in the executable once.

Sign up to request clarification or add additional context in comments.

1 Comment

Thanks! I wasn't able to understand thoroughly from above comments... but the answer made me able to do it!
1

The bit size is not always "8-bit" though, it depends on the OS.

We can diagnose the executable into code sections to get its address allocation.

See the result from CentOS as below, the add operation in <main> part shows more details.

[root@localhost zz]# ./a.out 
1
0x4006e8
2
0x4006f8
3
0x400700
4
0x400708
[root@localhost zz]# objdump -d a.out

a.out:     file format elf64-littleaarch64


Disassembly of section .init:

0000000000400408 <_init>:
  400408:       a9bf7bfd        stp     x29, x30, [sp,#-16]!
  40040c:       910003fd        mov     x29, sp
  400410:       94000032        bl      4004d8 <call_weak_fn>
  400414:       a8c17bfd        ldp     x29, x30, [sp],#16
  400418:       d65f03c0        ret

Disassembly of section .plt:

0000000000400420 <.plt>:
  400420:       a9bf7bf0        stp     x16, x30, [sp,#-16]!
  400424:       f00000f0        adrp    x16, 41f000 <__FRAME_END__+0x1e8f4>
  400428:       f947fe11        ldr     x17, [x16,#4088]
  40042c:       913fe210        add     x16, x16, #0xff8
  400430:       d61f0220        br      x17
  400434:       d503201f        nop
  400438:       d503201f        nop
  40043c:       d503201f        nop

0000000000400440 <__libc_start_main@plt>:
  400440:       90000110        adrp    x16, 420000 <__libc_start_main@GLIBC_2.17>
  400444:       f9400211        ldr     x17, [x16]
  400448:       91000210        add     x16, x16, #0x0
  40044c:       d61f0220        br      x17

0000000000400450 <__gmon_start__@plt>:
  400450:       90000110        adrp    x16, 420000 <__libc_start_main@GLIBC_2.17>
  400454:       f9400611        ldr     x17, [x16,#8]
  400458:       91002210        add     x16, x16, #0x8
  40045c:       d61f0220        br      x17

0000000000400460 <abort@plt>:
  400460:       90000110        adrp    x16, 420000 <__libc_start_main@GLIBC_2.17>
  400464:       f9400a11        ldr     x17, [x16,#16]
  400468:       91004210        add     x16, x16, #0x10
  40046c:       d61f0220        br      x17

0000000000400470 <puts@plt>:
  400470:       90000110        adrp    x16, 420000 <__libc_start_main@GLIBC_2.17>
  400474:       f9400e11        ldr     x17, [x16,#24]
  400478:       91006210        add     x16, x16, #0x18
  40047c:       d61f0220        br      x17

0000000000400480 <printf@plt>:
  400480:       90000110        adrp    x16, 420000 <__libc_start_main@GLIBC_2.17>
  400484:       f9401211        ldr     x17, [x16,#32]
  400488:       91008210        add     x16, x16, #0x20
  40048c:       d61f0220        br      x17

Disassembly of section .text:

0000000000400490 <_start>:
  400490:       d280001d        mov     x29, #0x0                       // #0
  400494:       d280001e        mov     x30, #0x0                       // #0
  400498:       910003fd        mov     x29, sp
  40049c:       aa0003e5        mov     x5, x0
  4004a0:       f94003e1        ldr     x1, [sp]
  4004a4:       910023e2        add     x2, sp, #0x8
  4004a8:       910003e6        mov     x6, sp
  4004ac:       580000a0        ldr     x0, 4004c0 <_start+0x30>
  4004b0:       580000c3        ldr     x3, 4004c8 <_start+0x38>
  4004b4:       580000e4        ldr     x4, 4004d0 <_start+0x40>
  4004b8:       97ffffe2        bl      400440 <__libc_start_main@plt>
  4004bc:       97ffffe9        bl      400460 <abort@plt>
  4004c0:       00400594        .word   0x00400594
  4004c4:       00000000        .word   0x00000000
  4004c8:       00400638        .word   0x00400638
  4004cc:       00000000        .word   0x00000000
  4004d0:       004006b8        .word   0x004006b8
  4004d4:       00000000        .word   0x00000000

00000000004004d8 <call_weak_fn>:
  4004d8:       f00000e0        adrp    x0, 41f000 <__FRAME_END__+0x1e8f4>
  4004dc:       f947f000        ldr     x0, [x0,#4064]
  4004e0:       b4000040        cbz     x0, 4004e8 <call_weak_fn+0x10>
  4004e4:       17ffffdb        b       400450 <__gmon_start__@plt>
  4004e8:       d65f03c0        ret
  4004ec:       00000000        .inst   0x00000000 ; undefined

00000000004004f0 <deregister_tm_clones>:
  4004f0:       90000100        adrp    x0, 420000 <__libc_start_main@GLIBC_2.17>
  4004f4:       9100e000        add     x0, x0, #0x38
  4004f8:       90000101        adrp    x1, 420000 <__libc_start_main@GLIBC_2.17>
  4004fc:       9100e021        add     x1, x1, #0x38
  400500:       eb00003f        cmp     x1, x0
  400504:       540000a0        b.eq    400518 <deregister_tm_clones+0x28>
  400508:       90000001        adrp    x1, 400000 <_init-0x408>
  40050c:       f9436c21        ldr     x1, [x1,#1752]
  400510:       b4000041        cbz     x1, 400518 <deregister_tm_clones+0x28>
  400514:       d61f0020        br      x1
  400518:       d65f03c0        ret
  40051c:       d503201f        nop

0000000000400520 <register_tm_clones>:
  400520:       90000100        adrp    x0, 420000 <__libc_start_main@GLIBC_2.17>
  400524:       9100e000        add     x0, x0, #0x38
  400528:       90000101        adrp    x1, 420000 <__libc_start_main@GLIBC_2.17>
  40052c:       9100e021        add     x1, x1, #0x38
  400530:       cb000021        sub     x1, x1, x0
  400534:       9343fc21        asr     x1, x1, #3
  400538:       8b41fc21        add     x1, x1, x1, lsr #63
  40053c:       9341fc21        asr     x1, x1, #1
  400540:       b40000c1        cbz     x1, 400558 <register_tm_clones+0x38>
  400544:       90000002        adrp    x2, 400000 <_init-0x408>
  400548:       f9437042        ldr     x2, [x2,#1760]
  40054c:       b4000062        cbz     x2, 400558 <register_tm_clones+0x38>
  400550:       d61f0040        br      x2
  400554:       d503201f        nop
  400558:       d65f03c0        ret
  40055c:       d503201f        nop

0000000000400560 <__do_global_dtors_aux>:
  400560:       a9be7bfd        stp     x29, x30, [sp,#-32]!
  400564:       910003fd        mov     x29, sp
  400568:       f9000bf3        str     x19, [sp,#16]
  40056c:       90000113        adrp    x19, 420000 <__libc_start_main@GLIBC_2.17>
  400570:       3940e260        ldrb    w0, [x19,#56]
  400574:       35000080        cbnz    w0, 400584 <__do_global_dtors_aux+0x24>
  400578:       97ffffde        bl      4004f0 <deregister_tm_clones>
  40057c:       52800020        mov     w0, #0x1                        // #1
  400580:       3900e260        strb    w0, [x19,#56]
  400584:       f9400bf3        ldr     x19, [sp,#16]
  400588:       a8c27bfd        ldp     x29, x30, [sp],#32
  40058c:       d65f03c0        ret

0000000000400590 <frame_dummy>:
  400590:       17ffffe4        b       400520 <register_tm_clones>

0000000000400594 <main>:
  400594:       a9be7bfd        stp     x29, x30, [sp,#-32]!
  400598:       910003fd        mov     x29, sp
  40059c:       90000000        adrp    x0, 400000 <_init-0x408>
  4005a0:       911ba000        add     x0, x0, #0x6e8
  4005a4:       f9000fa0        str     x0, [x29,#24]
  4005a8:       f9400fa0        ldr     x0, [x29,#24]
  4005ac:       97ffffb1        bl      400470 <puts@plt>
  4005b0:       90000000        adrp    x0, 400000 <_init-0x408>
  4005b4:       911bc000        add     x0, x0, #0x6f0
  4005b8:       f9400fa1        ldr     x1, [x29,#24]
  4005bc:       97ffffb1        bl      400480 <printf@plt>
  4005c0:       90000000        adrp    x0, 400000 <_init-0x408>
  4005c4:       911be000        add     x0, x0, #0x6f8
  4005c8:       f9000fa0        str     x0, [x29,#24]
  4005cc:       f9400fa0        ldr     x0, [x29,#24]
  4005d0:       97ffffa8        bl      400470 <puts@plt>
  4005d4:       90000000        adrp    x0, 400000 <_init-0x408>
  4005d8:       911bc000        add     x0, x0, #0x6f0
  4005dc:       f9400fa1        ldr     x1, [x29,#24]
  4005e0:       97ffffa8        bl      400480 <printf@plt>
  4005e4:       90000000        adrp    x0, 400000 <_init-0x408>
  4005e8:       911c0000        add     x0, x0, #0x700
  4005ec:       f9000fa0        str     x0, [x29,#24]
  4005f0:       f9400fa0        ldr     x0, [x29,#24]
  4005f4:       97ffff9f        bl      400470 <puts@plt>
  4005f8:       90000000        adrp    x0, 400000 <_init-0x408>
  4005fc:       911bc000        add     x0, x0, #0x6f0
  400600:       f9400fa1        ldr     x1, [x29,#24]
  400604:       97ffff9f        bl      400480 <printf@plt>
  400608:       90000000        adrp    x0, 400000 <_init-0x408>
  40060c:       911c2000        add     x0, x0, #0x708
  400610:       f9000fa0        str     x0, [x29,#24]
  400614:       f9400fa0        ldr     x0, [x29,#24]
  400618:       97ffff96        bl      400470 <puts@plt>
  40061c:       90000000        adrp    x0, 400000 <_init-0x408>
  400620:       911bc000        add     x0, x0, #0x6f0
  400624:       f9400fa1        ldr     x1, [x29,#24]
  400628:       97ffff96        bl      400480 <printf@plt>
  40062c:       52800000        mov     w0, #0x0                        // #0
  400630:       a8c27bfd        ldp     x29, x30, [sp],#32
  400634:       d65f03c0        ret

0000000000400638 <__libc_csu_init>:
  400638:       a9bc7bfd        stp     x29, x30, [sp,#-64]!
  40063c:       910003fd        mov     x29, sp
  400640:       a901d7f4        stp     x20, x21, [sp,#24]
  400644:       f00000f4        adrp    x20, 41f000 <__FRAME_END__+0x1e8f4>
  400648:       f00000f5        adrp    x21, 41f000 <__FRAME_END__+0x1e8f4>
  40064c:       91380294        add     x20, x20, #0xe00
  400650:       9137e2b5        add     x21, x21, #0xdf8
  400654:       a902dff6        stp     x22, x23, [sp,#40]
  400658:       cb150294        sub     x20, x20, x21
  40065c:       f9001ff8        str     x24, [sp,#56]
  400660:       9343fe94        asr     x20, x20, #3
  400664:       2a0003f6        mov     w22, w0
  400668:       aa0103f7        mov     x23, x1
  40066c:       aa0203f8        mov     x24, x2
  400670:       97ffff66        bl      400408 <_init>
  400674:       b4000194        cbz     x20, 4006a4 <__libc_csu_init+0x6c>
  400678:       f9000bb3        str     x19, [x29,#16]
  40067c:       d2800013        mov     x19, #0x0                       // #0
  400680:       f8737aa3        ldr     x3, [x21,x19,lsl #3]
  400684:       aa1803e2        mov     x2, x24
  400688:       aa1703e1        mov     x1, x23
  40068c:       2a1603e0        mov     w0, w22
  400690:       91000673        add     x19, x19, #0x1
  400694:       d63f0060        blr     x3
  400698:       eb13029f        cmp     x20, x19
  40069c:       54ffff21        b.ne    400680 <__libc_csu_init+0x48>
  4006a0:       f9400bb3        ldr     x19, [x29,#16]
  4006a4:       a941d7f4        ldp     x20, x21, [sp,#24]
  4006a8:       a942dff6        ldp     x22, x23, [sp,#40]
  4006ac:       f9401ff8        ldr     x24, [sp,#56]
  4006b0:       a8c47bfd        ldp     x29, x30, [sp],#64
  4006b4:       d65f03c0        ret

00000000004006b8 <__libc_csu_fini>:
  4006b8:       d65f03c0        ret

Disassembly of section .fini:

00000000004006bc <_fini>:
  4006bc:       a9bf7bfd        stp     x29, x30, [sp,#-16]!
  4006c0:       910003fd        mov     x29, sp
  4006c4:       a8c17bfd        ldp     x29, x30, [sp],#16
  4006c8:       d65f03c0        ret

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.