Where is GCC's __divti3 defined?

Question

For GCC's __int128 type

__int128 div128(__int128 a, __int128 b) {return a / b;}

GCC will generate a call to __divti3 instead of inline assembly. So where is this actually defined? Github's search only returns docs or tests that use it. I don't know if github's search function is terrible or the function is sneakily renamed somewhere.

For the close-voter who flagged this as "seeking recommendations for software": I implore you to think about what a recommendation and "opinion-based answer" is versus what I am asking. — qwr
– qwr, Commented Sep 5 at 15:12
@BoP what you have linked only has long long __divti3(). Plus GCC 3.4.0 is extremely old. — qwr
– qwr, Commented Sep 5 at 21:11
What's confusing is that I think long long is only 64-bit on my platform. But gcc.gnu.org/onlinedocs/gccint/Machine-Modes.html#index-TImode is for 16-byte (aka int128)? — qwr
– qwr, Commented Sep 5 at 21:17

Ctx · Accepted Answer · 2025-09-05 15:49:08Z

2

These kind of functions are used on platforms that do not have native support of a specific arithmetic function. It is provided by the gcc for a specific platform and usually located in the shared library libgcc_s (gcc's runtime library).

The symbol is generated in libgcc/libgcc2.c buried in some macro expansion:

CTYPE
CONCAT3(__div,MODE,3) (MTYPE a, MTYPE b, MTYPE c, MTYPE d)
{

edited Sep 5 at 15:49

answered Sep 5 at 15:15

Ctx

18.6k24 gold badges38 silver badges56 bronze badges

Sign up to request clarification or add additional context in comments.

9 Comments

qwr Sep 5 at 15:17

So it's not part of GCC's source? Who is providing it for Linux x86-64?

Ctx Sep 5 at 15:18

Yes, it's part of gcc, I thought I made that clear ;) I rephrased it a bit, perhaps its better understandable now

qwr Sep 5 at 15:22

So where is it located? I have tried searching through i386 config but I couldn't find it. I've found in GCC that simple functions are often renamed several times on the way to the actual internal implementation.

Ctx Sep 5 at 15:49

There you go...

0___________ Sep 5 at 18:45

github.com/gcc-mirror/gcc/blob/master/libgcc/libgcc2.c

qwr Sep 5 at 21:04

Are you sure it's that macro? The comments say it's for FP division.

qwr Sep 5 at 21:19

The MODE in that macro is for complex floating types? gcc.gnu.org/onlinedocs/gccint/Machine-Modes.html#index-QCmode

Ctx Sep 5 at 21:28

TI mode ist also mentioned there

|

qwr · Accepted Answer · 2025-09-08 14:15:40Z

GCC defines "Machine Modes", where TImode means 16-byte "tetra integer", i.e. 128-bit for platforms with 8-bit bytes (pretty much all of them).

In libgcc/libgcc2.h, 64-bit platforms have #define DWtype TItype, while for 32-bit platforms have #define DWtype DItype (their double-word is 64-bit), so don't support __int128.

Further along there is extern DWtype __divdi3 (DWtype, DWtype);

In libgcc/libgcc2.c, there is the following definition, which simplifies slightly to unsigned division:

#ifdef L_divdi3
DWtype
__divdi3 (DWtype u, DWtype v)
{
  Wtype c = 0;
  DWunion uu = {.ll = u};
  DWunion vv = {.ll = v};
  DWtype w;

  if (uu.s.high < 0)
    c = ~c,
    uu.ll = -uu.ll;
  if (vv.s.high < 0)
    c = ~c,
    vv.ll = -vv.ll;

  w = __udivmoddi4 (uu.ll, vv.ll, (UDWtype *) 0);
  if (c)
    w = -w;

  return w;
}
#endif

There are two implementations, depending on if hardware divide is available or not.

#ifdef L_udivmoddi4
#ifdef TARGET_HAS_NO_HW_DIVIDE

#if (defined (L_udivdi3) || defined (L_divdi3) || \
     defined (L_umoddi3) || defined (L_moddi3) || \
     defined (L_divmoddi4))
static inline __attribute__ ((__always_inline__))
#endif
UDWtype
__udivmoddi4 (UDWtype n, UDWtype d, UDWtype *rp)
{
  UDWtype q = 0, r = n, y = d;
  UWtype lz1, lz2, i, k;

  /* Implements align divisor shift dividend method. This algorithm
     aligns the divisor under the dividend and then perform number of
     test-subtract iterations which shift the dividend left. Number of
     iterations is k + 1 where k is the number of bit positions the
     divisor must be shifted left to align it under the dividend.
     quotient bits can be saved in the rightmost positions of the dividend
     as it shifts left on each test-subtract iteration. */

  if (y <= r)
    {
      lz1 = __builtin_clzll (d);
      lz2 = __builtin_clzll (n);

      k = lz1 - lz2;
      y = (y << k);

      /* Dividend can exceed 2 ^ (width - 1) - 1 but still be less than the
     aligned divisor. Normal iteration can drops the high order bit
     of the dividend. Therefore, first test-subtract iteration is a
     special case, saving its quotient bit in a separate location and
     not shifting the dividend. */
      if (r >= y)
    {
      r = r - y;
      q =  (1ULL << k);
    }

      if (k > 0)
    {
      y = y >> 1;

      /* k additional iterations where k regular test subtract shift
        dividend iterations are done.  */
      i = k;
      do
        {
          if (r >= y)
        r = ((r - y) << 1) + 1;
          else
        r =  (r << 1);
          i = i - 1;
        } while (i != 0);

      /* First quotient bit is combined with the quotient bits resulting
         from the k regular iterations.  */
      q = q + r;
      r = r >> k;
      q = q - (r << k);
    }
    }

  if (rp)
    *rp = r;
  return q;
}

#else

#if (defined (L_udivdi3) || defined (L_divdi3) || \
     defined (L_umoddi3) || defined (L_moddi3) || \
     defined (L_divmoddi4))
static inline __attribute__ ((__always_inline__))
#endif
UDWtype
__udivmoddi4 (UDWtype n, UDWtype d, UDWtype *rp)
{
  const DWunion nn = {.ll = n};
  const DWunion dd = {.ll = d};
  DWunion rr;
  UWtype d0, d1, n0, n1, n2;
  UWtype q0, q1;
  UWtype b, bm;

  d0 = dd.s.low;
  d1 = dd.s.high;
  n0 = nn.s.low;
  n1 = nn.s.high;

#if !UDIV_NEEDS_NORMALIZATION
  if (d1 == 0)
    {
      if (d0 > n1)
    {
      /* 0q = nn / 0D */

      udiv_qrnnd (q0, n0, n1, n0, d0);
      q1 = 0;

      /* Remainder in n0.  */
    }
      else
    {
      /* qq = NN / 0d */

      if (d0 == 0)
        d0 = 1 / d0;    /* Divide intentionally by zero.  */

      udiv_qrnnd (q1, n1, 0, n1, d0);
      udiv_qrnnd (q0, n0, n1, n0, d0);

      /* Remainder in n0.  */
    }

      if (rp != 0)
    {
      rr.s.low = n0;
      rr.s.high = 0;
      *rp = rr.ll;
    }
    }

#else /* UDIV_NEEDS_NORMALIZATION */

  if (d1 == 0)
    {
      if (d0 > n1)
    {
      /* 0q = nn / 0D */

      count_leading_zeros (bm, d0);

      if (bm != 0)
        {
          /* Normalize, i.e. make the most significant bit of the
         denominator set.  */

          d0 = d0 << bm;
          n1 = (n1 << bm) | (n0 >> (W_TYPE_SIZE - bm));
          n0 = n0 << bm;
        }

      udiv_qrnnd (q0, n0, n1, n0, d0);
      q1 = 0;

      /* Remainder in n0 >> bm.  */
    }
      else
    {
      /* qq = NN / 0d */

      if (d0 == 0)
        d0 = 1 / d0;    /* Divide intentionally by zero.  */

      count_leading_zeros (bm, d0);

      if (bm == 0)
        {
          /* From (n1 >= d0) /\ (the most significant bit of d0 is set),
         conclude (the most significant bit of n1 is set) /\ (the
         leading quotient digit q1 = 1).

         This special case is necessary, not an optimization.
         (Shifts counts of W_TYPE_SIZE are undefined.)  */

          n1 -= d0;
          q1 = 1;
        }
      else
        {
          /* Normalize.  */

          b = W_TYPE_SIZE - bm;

          d0 = d0 << bm;
          n2 = n1 >> b;
          n1 = (n1 << bm) | (n0 >> b);
          n0 = n0 << bm;

          udiv_qrnnd (q1, n1, n2, n1, d0);
        }

      /* n1 != d0...  */

      udiv_qrnnd (q0, n0, n1, n0, d0);

      /* Remainder in n0 >> bm.  */
    }

      if (rp != 0)
    {
      rr.s.low = n0 >> bm;
      rr.s.high = 0;
      *rp = rr.ll;
    }
    }
#endif /* UDIV_NEEDS_NORMALIZATION */

  else
    {
      if (d1 > n1)
    {
      /* 00 = nn / DD */

      q0 = 0;
      q1 = 0;

      /* Remainder in n1n0.  */
      if (rp != 0)
        {
          rr.s.low = n0;
          rr.s.high = n1;
          *rp = rr.ll;
        }
    }
      else
    {
      /* 0q = NN / dd */

      count_leading_zeros (bm, d1);
      if (bm == 0)
        {
          /* From (n1 >= d1) /\ (the most significant bit of d1 is set),
         conclude (the most significant bit of n1 is set) /\ (the
         quotient digit q0 = 0 or 1).

         This special case is necessary, not an optimization.  */

          /* The condition on the next line takes advantage of that
         n1 >= d1 (true due to program flow).  */
          if (n1 > d1 || n0 >= d0)
        {
          q0 = 1;
          sub_ddmmss (n1, n0, n1, n0, d1, d0);
        }
          else
        q0 = 0;

          q1 = 0;

          if (rp != 0)
        {
          rr.s.low = n0;
          rr.s.high = n1;
          *rp = rr.ll;
        }
        }
      else
        {
          UWtype m1, m0;
          /* Normalize.  */

          b = W_TYPE_SIZE - bm;

          d1 = (d1 << bm) | (d0 >> b);
          d0 = d0 << bm;
          n2 = n1 >> b;
          n1 = (n1 << bm) | (n0 >> b);
          n0 = n0 << bm;

          udiv_qrnnd (q0, n1, n2, n1, d1);
          umul_ppmm (m1, m0, q0, d0);

          if (m1 > n1 || (m1 == n1 && m0 > n0))
        {
          q0--;
          sub_ddmmss (m1, m0, m1, m0, d1, d0);
        }

          q1 = 0;

          /* Remainder in (n1n0 - m1m0) >> bm.  */
          if (rp != 0)
        {
          sub_ddmmss (n1, n0, n1, n0, m1, m0);
          rr.s.low = (n1 << b) | (n0 >> bm);
          rr.s.high = n1 >> bm;
          *rp = rr.ll;
        }
        }
    }
    }

  const DWunion ww = {{.low = q0, .high = q1}};
  return ww.ll;
}
#endif
#endif

In include/longlong.h

/* Define auxiliary asm macros.
...
   3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
   denominator) divides a UDWtype, composed by the UWtype integers
   HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
   in QUOTIENT and the remainder in REMAINDER.  HIGH_NUMERATOR must be less
   than DENOMINATOR for correct operation.  If, in addition, the most
   significant bit of DENOMINATOR must be 1, then the pre-processor symbol
   UDIV_NEEDS_NORMALIZATION is defined to 1.
...
   If any of these macros are left undefined for a particular CPU,
   C macros are used.  */

For x86-64 HW divide it uses divq (GCC syntax).

#if defined (__x86_64__) && W_TYPE_SIZE == 64
...
#define udiv_qrnnd(q, r, n1, n0, dv) \
  __asm__ ("div{q} %4"                          \
       : "=a" ((UDItype) (q)),                  \
         "=d" ((UDItype) (r))                   \
       : "0" ((UDItype) (n0)),                  \
         "1" ((UDItype) (n1)),                  \
         "rm" ((UDItype) (dv)))
#define count_leading_zeros(count, x)   ((count) = __builtin_clzll (x))
#define count_trailing_zeros(count, x)  ((count) = __builtin_ctzll (x))
#define UMUL_TIME 40
#define UDIV_TIME 40
#endif /* x86_64 */

Collectives™ on Stack Overflow

Where is GCC's __divti3 defined?

2 Answers 2

9 Comments

Comments

Your Answer

Hot Network Questions

Collectives™ on Stack Overflow

2 Answers 2

9 Comments

Comments

Your Answer

Sign up or log in

Post as a guest

Related