add third_party module

61b40fbb · limm · 32884939 · 61b40fbb · 61b40fbb · 61b40fbb
Commit 61b40fbb authored Jun 25, 2025 by limm
8 changed files
--- a/third_party/catch2/catch.hpp
+++ b/third_party/catch2/catch.hpp
--- a/third_party/clipper/License.txt
+++ b/third_party/clipper/License.txt
+Boost Software License - Version 1.0 - August 17th, 2003
+http://www.boost.org/LICENSE_1_0.txt
+
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
--- a/third_party/clipper/clipper.cpp
+++ b/third_party/clipper/clipper.cpp
+/*******************************************************************************
+*                                                                              *
+* Author    :  Angus Johnson                                                   *
+* Version   :  6.4.2                                                           *
+* Date      :  27 February 2017                                                *
+* Website   :  http://www.angusj.com                                           *
+* Copyright :  Angus Johnson 2010-2017                                         *
+*                                                                              *
+* License:                                                                     *
+* Use, modification & distribution is subject to Boost Software License Ver 1. *
+* http://www.boost.org/LICENSE_1_0.txt                                         *
+*                                                                              *
+* Attributions:                                                                *
+* The code in this library is an extension of Bala Vatti's clipping algorithm: *
+* "A generic solution to polygon clipping"                                     *
+* Communications of the ACM, Vol 35, Issue 7 (July 1992) pp 56-63.             *
+* http://portal.acm.org/citation.cfm?id=129906                                 *
+*                                                                              *
+* Computer graphics and geometric modeling: implementation and algorithms      *
+* By Max K. Agoston                                                            *
+* Springer; 1 edition (January 4, 2005)                                        *
+* http://books.google.com/books?q=vatti+clipping+agoston                       *
+*                                                                              *
+* See also:                                                                    *
+* "Polygon Offsetting by Computing Winding Numbers"                            *
+* Paper no. DETC2005-85513 pp. 565-575                                         *
+* ASME 2005 International Design Engineering Technical Conferences             *
+* and Computers and Information in Engineering Conference (IDETC/CIE2005)      *
+* September 24-28, 2005 , Long Beach, California, USA                          *
+* http://www.me.berkeley.edu/~mcmains/pubs/DAC05OffsetPolygon.pdf              *
+*                                                                              *
+*******************************************************************************/
+
+/*******************************************************************************
+*                                                                              *
+* This is a translation of the Delphi Clipper library and the naming style     *
+* used has retained a Delphi flavour.                                          *
+*                                                                              *
+*******************************************************************************/
+
+#include "clipper.hpp"
+#include <cmath>
+#include <vector>
+#include <algorithm>
+#include <stdexcept>
+#include <cstring>
+#include <cstdlib>
+#include <ostream>
+#include <functional>
+
+namespace ClipperLib {
+
+static double const pi = 3.141592653589793238;
+static double const two_pi = pi *2;
+static double const def_arc_tolerance = 0.25;
+
+enum Direction { dRightToLeft, dLeftToRight };
+
+static int const Unassigned = -1;  //edge not currently 'owning' a solution
+static int const Skip = -2;        //edge that would otherwise close a path
+
+#define HORIZONTAL (-1.0E+40)
+#define TOLERANCE (1.0e-20)
+#define NEAR_ZERO(val) (((val) > -TOLERANCE) && ((val) < TOLERANCE))
+
+struct TEdge {
+  IntPoint Bot;
+  IntPoint Curr; //current (updated for every new scanbeam)
+  IntPoint Top;
+  double Dx;
+  PolyType PolyTyp;
+  EdgeSide Side; //side only refers to current side of solution poly
+  int WindDelta; //1 or -1 depending on winding direction
+  int WindCnt;
+  int WindCnt2; //winding count of the opposite polytype
+  int OutIdx;
+  TEdge *Next;
+  TEdge *Prev;
+  TEdge *NextInLML;
+  TEdge *NextInAEL;
+  TEdge *PrevInAEL;
+  TEdge *NextInSEL;
+  TEdge *PrevInSEL;
+};
+
+struct IntersectNode {
+  TEdge          *Edge1;
+  TEdge          *Edge2;
+  IntPoint        Pt;
+};
+
+struct LocalMinimum {
+  cInt          Y;
+  TEdge        *LeftBound;
+  TEdge        *RightBound;
+};
+
+struct OutPt;
+
+//OutRec: contains a path in the clipping solution. Edges in the AEL will
+//carry a pointer to an OutRec when they are part of the clipping solution.
+struct OutRec {
+  int       Idx;
+  bool      IsHole;
+  bool      IsOpen;
+  OutRec   *FirstLeft;  //see comments in clipper.pas
+  PolyNode *PolyNd;
+  OutPt    *Pts;
+  OutPt    *BottomPt;
+};
+
+struct OutPt {
+  int       Idx;
+  IntPoint  Pt;
+  OutPt    *Next;
+  OutPt    *Prev;
+};
+
+struct Join {
+  OutPt    *OutPt1;
+  OutPt    *OutPt2;
+  IntPoint  OffPt;
+};
+
+struct LocMinSorter
+{
+  inline bool operator()(const LocalMinimum& locMin1, const LocalMinimum& locMin2)
+  {
+    return locMin2.Y < locMin1.Y;
+  }
+};
+
+//------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
+
+inline cInt Round(double val)
+{
+  if ((val < 0)) return static_cast<cInt>(val - 0.5);
+  else return static_cast<cInt>(val + 0.5);
+}
+//------------------------------------------------------------------------------
+
+inline cInt Abs(cInt val)
+{
+  return val < 0 ? -val : val;
+}
+
+//------------------------------------------------------------------------------
+// PolyTree methods ...
+//------------------------------------------------------------------------------
+
+void PolyTree::Clear()
+{
+    for (PolyNodes::size_type i = 0; i < AllNodes.size(); ++i)
+      delete AllNodes[i];
+    AllNodes.resize(0);
+    Childs.resize(0);
+}
+//------------------------------------------------------------------------------
+
+PolyNode* PolyTree::GetFirst() const
+{
+  if (!Childs.empty())
+      return Childs[0];
+  else
+      return 0;
+}
+//------------------------------------------------------------------------------
+
+int PolyTree::Total() const
+{
+  int result = (int)AllNodes.size();
+  //with negative offsets, ignore the hidden outer polygon ...
+  if (result > 0 && Childs[0] != AllNodes[0]) result--;
+  return result;
+}
+
+//------------------------------------------------------------------------------
+// PolyNode methods ...
+//------------------------------------------------------------------------------
+
+PolyNode::PolyNode(): Parent(0), Index(0), m_IsOpen(false)
+{
+}
+//------------------------------------------------------------------------------
+
+int PolyNode::ChildCount() const
+{
+  return (int)Childs.size();
+}
+//------------------------------------------------------------------------------
+
+void PolyNode::AddChild(PolyNode& child)
+{
+  unsigned cnt = (unsigned)Childs.size();
+  Childs.push_back(&child);
+  child.Parent = this;
+  child.Index = cnt;
+}
+//------------------------------------------------------------------------------
+
+PolyNode* PolyNode::GetNext() const
+{
+  if (!Childs.empty())
+      return Childs[0];
+  else
+      return GetNextSiblingUp();
+}
+//------------------------------------------------------------------------------
+
+PolyNode* PolyNode::GetNextSiblingUp() const
+{
+  if (!Parent) //protects against PolyTree.GetNextSiblingUp()
+      return 0;
+  else if (Index == Parent->Childs.size() - 1)
+      return Parent->GetNextSiblingUp();
+  else
+      return Parent->Childs[Index + 1];
+}
+//------------------------------------------------------------------------------
+
+bool PolyNode::IsHole() const
+{
+  bool result = true;
+  PolyNode* node = Parent;
+  while (node)
+  {
+      result = !result;
+      node = node->Parent;
+  }
+  return result;
+}
+//------------------------------------------------------------------------------
+
+bool PolyNode::IsOpen() const
+{
+  return m_IsOpen;
+}
+//------------------------------------------------------------------------------
+
+#ifndef use_int32
+
+//------------------------------------------------------------------------------
+// Int128 class (enables safe math on signed 64bit integers)
+// eg Int128 val1((long64)9223372036854775807); //ie 2^63 -1
+//    Int128 val2((long64)9223372036854775807);
+//    Int128 val3 = val1 * val2;
+//    val3.AsString => "85070591730234615847396907784232501249" (8.5e+37)
+//------------------------------------------------------------------------------
+
+class Int128
+{
+  public:
+    ulong64 lo;
+    long64 hi;
+
+    Int128(long64 _lo = 0)
+    {
+      lo = (ulong64)_lo;
+      if (_lo < 0)  hi = -1; else hi = 0;
+    }
+
+
+    Int128(const Int128 &val): lo(val.lo), hi(val.hi){}
+
+    Int128(const long64& _hi, const ulong64& _lo): lo(_lo), hi(_hi){}
+
+    Int128& operator = (const long64 &val)
+    {
+      lo = (ulong64)val;
+      if (val < 0) hi = -1; else hi = 0;
+      return *this;
+    }
+
+    bool operator == (const Int128 &val) const
+      {return (hi == val.hi && lo == val.lo);}
+
+    bool operator != (const Int128 &val) const
+      { return !(*this == val);}
+
+    bool operator > (const Int128 &val) const
+    {
+      if (hi != val.hi)
+        return hi > val.hi;
+      else
+        return lo > val.lo;
+    }
+
+    bool operator < (const Int128 &val) const
+    {
+      if (hi != val.hi)
+        return hi < val.hi;
+      else
+        return lo < val.lo;
+    }
+
+    bool operator >= (const Int128 &val) const
+      { return !(*this < val);}
+
+    bool operator <= (const Int128 &val) const
+      { return !(*this > val);}
+
+    Int128& operator += (const Int128 &rhs)
+    {
+      hi += rhs.hi;
+      lo += rhs.lo;
+      if (lo < rhs.lo) hi++;
+      return *this;
+    }
+
+    Int128 operator + (const Int128 &rhs) const
+    {
+      Int128 result(*this);
+      result+= rhs;
+      return result;
+    }
+
+    Int128& operator -= (const Int128 &rhs)
+    {
+      *this += -rhs;
+      return *this;
+    }
+
+    Int128 operator - (const Int128 &rhs) const
+    {
+      Int128 result(*this);
+      result -= rhs;
+      return result;
+    }
+
+    Int128 operator-() const //unary negation
+    {
+      if (lo == 0)
+        return Int128(-hi, 0);
+      else
+        return Int128(~hi, ~lo + 1);
+    }
+
+    operator double() const
+    {
+      const double shift64 = 18446744073709551616.0; //2^64
+      if (hi < 0)
+      {
+        if (lo == 0) return (double)hi * shift64;
+        else return -(double)(~lo + ~hi * shift64);
+      }
+      else
+        return (double)(lo + hi * shift64);
+    }
+
+};
+//------------------------------------------------------------------------------
+
+Int128 Int128Mul (long64 lhs, long64 rhs)
+{
+  bool negate = (lhs < 0) != (rhs < 0);
+
+  if (lhs < 0) lhs = -lhs;
+  ulong64 int1Hi = ulong64(lhs) >> 32;
+  ulong64 int1Lo = ulong64(lhs & 0xFFFFFFFF);
+
+  if (rhs < 0) rhs = -rhs;
+  ulong64 int2Hi = ulong64(rhs) >> 32;
+  ulong64 int2Lo = ulong64(rhs & 0xFFFFFFFF);
+
+  //nb: see comments in clipper.pas
+  ulong64 a = int1Hi * int2Hi;
+  ulong64 b = int1Lo * int2Lo;
+  ulong64 c = int1Hi * int2Lo + int1Lo * int2Hi;
+
+  Int128 tmp;
+  tmp.hi = long64(a + (c >> 32));
+  tmp.lo = long64(c << 32);
+  tmp.lo += long64(b);
+  if (tmp.lo < b) tmp.hi++;
+  if (negate) tmp = -tmp;
+  return tmp;
+};
+#endif
+
+//------------------------------------------------------------------------------
+// Miscellaneous global functions
+//------------------------------------------------------------------------------
+
+bool Orientation(const Path &poly)
+{
+    return Area(poly) >= 0;
+}
+//------------------------------------------------------------------------------
+
+double Area(const Path &poly)
+{
+  int size = (int)poly.size();
+  if (size < 3) return 0;
+
+  double a = 0;
+  for (int i = 0, j = size -1; i < size; ++i)
+  {
+    a += ((double)poly[j].X + poly[i].X) * ((double)poly[j].Y - poly[i].Y);
+    j = i;
+  }
+  return -a * 0.5;
+}
+//------------------------------------------------------------------------------
+
+double Area(const OutPt *op)
+{
+  const OutPt *startOp = op;
+  if (!op) return 0;
+  double a = 0;
+  do {
+    a +=  (double)(op->Prev->Pt.X + op->Pt.X) * (double)(op->Prev->Pt.Y - op->Pt.Y);
+    op = op->Next;
+  } while (op != startOp);
+  return a * 0.5;
+}
+//------------------------------------------------------------------------------
+
+double Area(const OutRec &outRec)
+{
+  return Area(outRec.Pts);
+}
+//------------------------------------------------------------------------------
+
+bool PointIsVertex(const IntPoint &Pt, OutPt *pp)
+{
+  OutPt *pp2 = pp;
+  do
+  {
+    if (pp2->Pt == Pt) return true;
+    pp2 = pp2->Next;
+  }
+  while (pp2 != pp);
+  return false;
+}
+//------------------------------------------------------------------------------
+
+//See "The Point in Polygon Problem for Arbitrary Polygons" by Hormann & Agathos
+//http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.88.5498&rep=rep1&type=pdf
+int PointInPolygon(const IntPoint &pt, const Path &path)
+{
+  //returns 0 if false, +1 if true, -1 if pt ON polygon boundary
+  int result = 0;
+  size_t cnt = path.size();
+  if (cnt < 3) return 0;
+  IntPoint ip = path[0];
+  for(size_t i = 1; i <= cnt; ++i)
+  {
+    IntPoint ipNext = (i == cnt ? path[0] : path[i]);
+    if (ipNext.Y == pt.Y)
+    {
+        if ((ipNext.X == pt.X) || (ip.Y == pt.Y &&
+          ((ipNext.X > pt.X) == (ip.X < pt.X)))) return -1;
+    }
+    if ((ip.Y < pt.Y) != (ipNext.Y < pt.Y))
+    {
+      if (ip.X >= pt.X)
+      {
+        if (ipNext.X > pt.X) result = 1 - result;
+        else
+        {
+          double d = (double)(ip.X - pt.X) * (ipNext.Y - pt.Y) -
+            (double)(ipNext.X - pt.X) * (ip.Y - pt.Y);
+          if (!d) return -1;
+          if ((d > 0) == (ipNext.Y > ip.Y)) result = 1 - result;
+        }
+      } else
+      {
+        if (ipNext.X > pt.X)
+        {
+          double d = (double)(ip.X - pt.X) * (ipNext.Y - pt.Y) -
+            (double)(ipNext.X - pt.X) * (ip.Y - pt.Y);
+          if (!d) return -1;
+          if ((d > 0) == (ipNext.Y > ip.Y)) result = 1 - result;
+        }
+      }
+    }
+    ip = ipNext;
+  }
+  return result;
+}
+//------------------------------------------------------------------------------
+
+int PointInPolygon (const IntPoint &pt, OutPt *op)
+{
+  //returns 0 if false, +1 if true, -1 if pt ON polygon boundary
+  int result = 0;
+  OutPt* startOp = op;
+  for(;;)
+  {
+    if (op->Next->Pt.Y == pt.Y)
+    {
+        if ((op->Next->Pt.X == pt.X) || (op->Pt.Y == pt.Y &&
+          ((op->Next->Pt.X > pt.X) == (op->Pt.X < pt.X)))) return -1;
+    }
+    if ((op->Pt.Y < pt.Y) != (op->Next->Pt.Y < pt.Y))
+    {
+      if (op->Pt.X >= pt.X)
+      {
+        if (op->Next->Pt.X > pt.X) result = 1 - result;
+        else
+        {
+          double d = (double)(op->Pt.X - pt.X) * (op->Next->Pt.Y - pt.Y) -
+            (double)(op->Next->Pt.X - pt.X) * (op->Pt.Y - pt.Y);
+          if (!d) return -1;
+          if ((d > 0) == (op->Next->Pt.Y > op->Pt.Y)) result = 1 - result;
+        }
+      } else
+      {
+        if (op->Next->Pt.X > pt.X)
+        {
+          double d = (double)(op->Pt.X - pt.X) * (op->Next->Pt.Y - pt.Y) -
+            (double)(op->Next->Pt.X - pt.X) * (op->Pt.Y - pt.Y);
+          if (!d) return -1;
+          if ((d > 0) == (op->Next->Pt.Y > op->Pt.Y)) result = 1 - result;
+        }
+      }
+    }
+    op = op->Next;
+    if (startOp == op) break;
+  }
+  return result;
+}
+//------------------------------------------------------------------------------
+
+bool Poly2ContainsPoly1(OutPt *OutPt1, OutPt *OutPt2)
+{
+  OutPt* op = OutPt1;
+  do
+  {
+    //nb: PointInPolygon returns 0 if false, +1 if true, -1 if pt on polygon
+    int res = PointInPolygon(op->Pt, OutPt2);
+    if (res >= 0) return res > 0;
+    op = op->Next;
+  }
+  while (op != OutPt1);
+  return true;
+}
+//----------------------------------------------------------------------
+
+bool SlopesEqual(const TEdge &e1, const TEdge &e2, bool UseFullInt64Range)
+{
+#ifndef use_int32
+  if (UseFullInt64Range)
+    return Int128Mul(e1.Top.Y - e1.Bot.Y, e2.Top.X - e2.Bot.X) ==
+    Int128Mul(e1.Top.X - e1.Bot.X, e2.Top.Y - e2.Bot.Y);
+  else
+#endif
+    return (e1.Top.Y - e1.Bot.Y) * (e2.Top.X - e2.Bot.X) ==
+    (e1.Top.X - e1.Bot.X) * (e2.Top.Y - e2.Bot.Y);
+}
+//------------------------------------------------------------------------------
+
+bool SlopesEqual(const IntPoint pt1, const IntPoint pt2,
+  const IntPoint pt3, bool UseFullInt64Range)
+{
+#ifndef use_int32
+  if (UseFullInt64Range)
+    return Int128Mul(pt1.Y-pt2.Y, pt2.X-pt3.X) == Int128Mul(pt1.X-pt2.X, pt2.Y-pt3.Y);
+  else
+#endif
+    return (pt1.Y-pt2.Y)*(pt2.X-pt3.X) == (pt1.X-pt2.X)*(pt2.Y-pt3.Y);
+}
+//------------------------------------------------------------------------------
+
+bool SlopesEqual(const IntPoint pt1, const IntPoint pt2,
+  const IntPoint pt3, const IntPoint pt4, bool UseFullInt64Range)
+{
+#ifndef use_int32
+  if (UseFullInt64Range)
+    return Int128Mul(pt1.Y-pt2.Y, pt3.X-pt4.X) == Int128Mul(pt1.X-pt2.X, pt3.Y-pt4.Y);
+  else
+#endif
+    return (pt1.Y-pt2.Y)*(pt3.X-pt4.X) == (pt1.X-pt2.X)*(pt3.Y-pt4.Y);
+}
+//------------------------------------------------------------------------------
+
+inline bool IsHorizontal(TEdge &e)
+{
+  return e.Dx == HORIZONTAL;
+}
+//------------------------------------------------------------------------------
+
+inline double GetDx(const IntPoint pt1, const IntPoint pt2)
+{
+  return (pt1.Y == pt2.Y) ?
+    HORIZONTAL : (double)(pt2.X - pt1.X) / (pt2.Y - pt1.Y);
+}
+//---------------------------------------------------------------------------
+
+inline void SetDx(TEdge &e)
+{
+  cInt dy  = (e.Top.Y - e.Bot.Y);
+  if (dy == 0) e.Dx = HORIZONTAL;
+  else e.Dx = (double)(e.Top.X - e.Bot.X) / dy;
+}
+//---------------------------------------------------------------------------
+
+inline void SwapSides(TEdge &Edge1, TEdge &Edge2)
+{
+  EdgeSide Side =  Edge1.Side;
+  Edge1.Side = Edge2.Side;
+  Edge2.Side = Side;
+}
+//------------------------------------------------------------------------------
+
+inline void SwapPolyIndexes(TEdge &Edge1, TEdge &Edge2)
+{
+  int OutIdx =  Edge1.OutIdx;
+  Edge1.OutIdx = Edge2.OutIdx;
+  Edge2.OutIdx = OutIdx;
+}
+//------------------------------------------------------------------------------
+
+inline cInt TopX(TEdge &edge, const cInt currentY)
+{
+  return ( currentY == edge.Top.Y ) ?
+    edge.Top.X : edge.Bot.X + Round(edge.Dx *(currentY - edge.Bot.Y));
+}
+//------------------------------------------------------------------------------
+
+void IntersectPoint(TEdge &Edge1, TEdge &Edge2, IntPoint &ip)
+{
+#ifdef use_xyz
+  ip.Z = 0;
+#endif
+
+  double b1, b2;
+  if (Edge1.Dx == Edge2.Dx)
+  {
+    ip.Y = Edge1.Curr.Y;
+    ip.X = TopX(Edge1, ip.Y);
+    return;
+  }
+  else if (Edge1.Dx == 0)
+  {
+    ip.X = Edge1.Bot.X;
+    if (IsHorizontal(Edge2))
+      ip.Y = Edge2.Bot.Y;
+    else
+    {
+      b2 = Edge2.Bot.Y - (Edge2.Bot.X / Edge2.Dx);
+      ip.Y = Round(ip.X / Edge2.Dx + b2);
+    }
+  }
+  else if (Edge2.Dx == 0)
+  {
+    ip.X = Edge2.Bot.X;
+    if (IsHorizontal(Edge1))
+      ip.Y = Edge1.Bot.Y;
+    else
+    {
+      b1 = Edge1.Bot.Y - (Edge1.Bot.X / Edge1.Dx);
+      ip.Y = Round(ip.X / Edge1.Dx + b1);
+    }
+  }
+  else
+  {
+    b1 = Edge1.Bot.X - Edge1.Bot.Y * Edge1.Dx;
+    b2 = Edge2.Bot.X - Edge2.Bot.Y * Edge2.Dx;
+    double q = (b2-b1) / (Edge1.Dx - Edge2.Dx);
+    ip.Y = Round(q);
+    if (std::fabs(Edge1.Dx) < std::fabs(Edge2.Dx))
+      ip.X = Round(Edge1.Dx * q + b1);
+    else
+      ip.X = Round(Edge2.Dx * q + b2);
+  }
+
+  if (ip.Y < Edge1.Top.Y || ip.Y < Edge2.Top.Y)
+  {
+    if (Edge1.Top.Y > Edge2.Top.Y)
+      ip.Y = Edge1.Top.Y;
+    else
+      ip.Y = Edge2.Top.Y;
+    if (std::fabs(Edge1.Dx) < std::fabs(Edge2.Dx))
+      ip.X = TopX(Edge1, ip.Y);
+    else
+      ip.X = TopX(Edge2, ip.Y);
+  }
+  //finally, don't allow 'ip' to be BELOW curr.Y (ie bottom of scanbeam) ...
+  if (ip.Y > Edge1.Curr.Y)
+  {
+    ip.Y = Edge1.Curr.Y;
+    //use the more vertical edge to derive X ...
+    if (std::fabs(Edge1.Dx) > std::fabs(Edge2.Dx))
+      ip.X = TopX(Edge2, ip.Y); else
+      ip.X = TopX(Edge1, ip.Y);
+  }
+}
+//------------------------------------------------------------------------------
+
+void ReversePolyPtLinks(OutPt *pp)
+{
+  if (!pp) return;
+  OutPt *pp1, *pp2;
+  pp1 = pp;
+  do {
+  pp2 = pp1->Next;
+  pp1->Next = pp1->Prev;
+  pp1->Prev = pp2;
+  pp1 = pp2;
+  } while( pp1 != pp );
+}
+//------------------------------------------------------------------------------
+
+void DisposeOutPts(OutPt*& pp)
+{
+  if (pp == 0) return;
+    pp->Prev->Next = 0;
+  while( pp )
+  {
+    OutPt *tmpPp = pp;
+    pp = pp->Next;
+    delete tmpPp;
+  }
+}
+//------------------------------------------------------------------------------
+
+inline void InitEdge(TEdge* e, TEdge* eNext, TEdge* ePrev, const IntPoint& Pt)
+{
+  std::memset(e, 0, sizeof(TEdge));
+  e->Next = eNext;
+  e->Prev = ePrev;
+  e->Curr = Pt;
+  e->OutIdx = Unassigned;
+}
+//------------------------------------------------------------------------------
+
+void InitEdge2(TEdge& e, PolyType Pt)
+{
+  if (e.Curr.Y >= e.Next->Curr.Y)
+  {
+    e.Bot = e.Curr;
+    e.Top = e.Next->Curr;
+  } else
+  {
+    e.Top = e.Curr;
+    e.Bot = e.Next->Curr;
+  }
+  SetDx(e);
+  e.PolyTyp = Pt;
+}
+//------------------------------------------------------------------------------
+
+TEdge* RemoveEdge(TEdge* e)
+{
+  //removes e from double_linked_list (but without removing from memory)
+  e->Prev->Next = e->Next;
+  e->Next->Prev = e->Prev;
+  TEdge* result = e->Next;
+  e->Prev = 0; //flag as removed (see ClipperBase.Clear)
+  return result;
+}
+//------------------------------------------------------------------------------
+
+inline void ReverseHorizontal(TEdge &e)
+{
+  //swap horizontal edges' Top and Bottom x's so they follow the natural
+  //progression of the bounds - ie so their xbots will align with the
+  //adjoining lower edge. [Helpful in the ProcessHorizontal() method.]
+  std::swap(e.Top.X, e.Bot.X);
+#ifdef use_xyz
+  std::swap(e.Top.Z, e.Bot.Z);
+#endif
+}
+//------------------------------------------------------------------------------
+
+void SwapPoints(IntPoint &pt1, IntPoint &pt2)
+{
+  IntPoint tmp = pt1;
+  pt1 = pt2;
+  pt2 = tmp;
+}
+//------------------------------------------------------------------------------
+
+bool GetOverlapSegment(IntPoint pt1a, IntPoint pt1b, IntPoint pt2a,
+  IntPoint pt2b, IntPoint &pt1, IntPoint &pt2)
+{
+  //precondition: segments are Collinear.
+  if (Abs(pt1a.X - pt1b.X) > Abs(pt1a.Y - pt1b.Y))
+  {
+    if (pt1a.X > pt1b.X) SwapPoints(pt1a, pt1b);
+    if (pt2a.X > pt2b.X) SwapPoints(pt2a, pt2b);
+    if (pt1a.X > pt2a.X) pt1 = pt1a; else pt1 = pt2a;
+    if (pt1b.X < pt2b.X) pt2 = pt1b; else pt2 = pt2b;
+    return pt1.X < pt2.X;
+  } else
+  {
+    if (pt1a.Y < pt1b.Y) SwapPoints(pt1a, pt1b);
+    if (pt2a.Y < pt2b.Y) SwapPoints(pt2a, pt2b);
+    if (pt1a.Y < pt2a.Y) pt1 = pt1a; else pt1 = pt2a;
+    if (pt1b.Y > pt2b.Y) pt2 = pt1b; else pt2 = pt2b;
+    return pt1.Y > pt2.Y;
+  }
+}
+//------------------------------------------------------------------------------
+
+bool FirstIsBottomPt(const OutPt* btmPt1, const OutPt* btmPt2)
+{
+  OutPt *p = btmPt1->Prev;
+  while ((p->Pt == btmPt1->Pt) && (p != btmPt1)) p = p->Prev;
+  double dx1p = std::fabs(GetDx(btmPt1->Pt, p->Pt));
+  p = btmPt1->Next;
+  while ((p->Pt == btmPt1->Pt) && (p != btmPt1)) p = p->Next;
+  double dx1n = std::fabs(GetDx(btmPt1->Pt, p->Pt));
+
+  p = btmPt2->Prev;
+  while ((p->Pt == btmPt2->Pt) && (p != btmPt2)) p = p->Prev;
+  double dx2p = std::fabs(GetDx(btmPt2->Pt, p->Pt));
+  p = btmPt2->Next;
+  while ((p->Pt == btmPt2->Pt) && (p != btmPt2)) p = p->Next;
+  double dx2n = std::fabs(GetDx(btmPt2->Pt, p->Pt));
+
+  if (std::max(dx1p, dx1n) == std::max(dx2p, dx2n) &&
+    std::min(dx1p, dx1n) == std::min(dx2p, dx2n))
+      return Area(btmPt1) > 0; //if otherwise identical use orientation
+  else
+    return (dx1p >= dx2p && dx1p >= dx2n) || (dx1n >= dx2p && dx1n >= dx2n);
+}
+//------------------------------------------------------------------------------
+
+OutPt* GetBottomPt(OutPt *pp)
+{
+  OutPt* dups = 0;
+  OutPt* p = pp->Next;
+  while (p != pp)
+  {
+    if (p->Pt.Y > pp->Pt.Y)
+    {
+      pp = p;
+      dups = 0;
+    }
+    else if (p->Pt.Y == pp->Pt.Y && p->Pt.X <= pp->Pt.X)
+    {
+      if (p->Pt.X < pp->Pt.X)
+      {
+        dups = 0;
+        pp = p;
+      } else
+      {
+        if (p->Next != pp && p->Prev != pp) dups = p;
+      }
+    }
+    p = p->Next;
+  }
+  if (dups)
+  {
+    //there appears to be at least 2 vertices at BottomPt so ...
+    while (dups != p)
+    {
+      if (!FirstIsBottomPt(p, dups)) pp = dups;
+      dups = dups->Next;
+      while (dups->Pt != pp->Pt) dups = dups->Next;
+    }
+  }
+  return pp;
+}
+//------------------------------------------------------------------------------
+
+bool Pt2IsBetweenPt1AndPt3(const IntPoint pt1,
+  const IntPoint pt2, const IntPoint pt3)
+{
+  if ((pt1 == pt3) || (pt1 == pt2) || (pt3 == pt2))
+    return false;
+  else if (pt1.X != pt3.X)
+    return (pt2.X > pt1.X) == (pt2.X < pt3.X);
+  else
+    return (pt2.Y > pt1.Y) == (pt2.Y < pt3.Y);
+}
+//------------------------------------------------------------------------------
+
+bool HorzSegmentsOverlap(cInt seg1a, cInt seg1b, cInt seg2a, cInt seg2b)
+{
+  if (seg1a > seg1b) std::swap(seg1a, seg1b);
+  if (seg2a > seg2b) std::swap(seg2a, seg2b);
+  return (seg1a < seg2b) && (seg2a < seg1b);
+}
+
+//------------------------------------------------------------------------------
+// ClipperBase class methods ...
+//------------------------------------------------------------------------------
+
+ClipperBase::ClipperBase() //constructor
+{
+  m_CurrentLM = m_MinimaList.begin(); //begin() == end() here
+  m_UseFullRange = false;
+}
+//------------------------------------------------------------------------------
+
+ClipperBase::~ClipperBase() //destructor
+{
+  Clear();
+}
+//------------------------------------------------------------------------------
+
+void RangeTest(const IntPoint& Pt, bool& useFullRange)
+{
+  if (useFullRange)
+  {
+    if (Pt.X > hiRange || Pt.Y > hiRange || -Pt.X > hiRange || -Pt.Y > hiRange)
+      throw clipperException("Coordinate outside allowed range");
+  }
+  else if (Pt.X > loRange|| Pt.Y > loRange || -Pt.X > loRange || -Pt.Y > loRange)
+  {
+    useFullRange = true;
+    RangeTest(Pt, useFullRange);
+  }
+}
+//------------------------------------------------------------------------------
+
+TEdge* FindNextLocMin(TEdge* E)
+{
+  for (;;)
+  {
+    while (E->Bot != E->Prev->Bot || E->Curr == E->Top) E = E->Next;
+    if (!IsHorizontal(*E) && !IsHorizontal(*E->Prev)) break;
+    while (IsHorizontal(*E->Prev)) E = E->Prev;
+    TEdge* E2 = E;
+    while (IsHorizontal(*E)) E = E->Next;
+    if (E->Top.Y == E->Prev->Bot.Y) continue; //ie just an intermediate horz.
+    if (E2->Prev->Bot.X < E->Bot.X) E = E2;
+    break;
+  }
+  return E;
+}
+//------------------------------------------------------------------------------
+
+TEdge* ClipperBase::ProcessBound(TEdge* E, bool NextIsForward)
+{
+  TEdge *Result = E;
+  TEdge *Horz = 0;
+
+  if (E->OutIdx == Skip)
+  {
+    //if edges still remain in the current bound beyond the skip edge then
+    //create another LocMin and call ProcessBound once more
+    if (NextIsForward)
+    {
+      while (E->Top.Y == E->Next->Bot.Y) E = E->Next;
+      //don't include top horizontals when parsing a bound a second time,
+      //they will be contained in the opposite bound ...
+      while (E != Result && IsHorizontal(*E)) E = E->Prev;
+    }
+    else
+    {
+      while (E->Top.Y == E->Prev->Bot.Y) E = E->Prev;
+      while (E != Result && IsHorizontal(*E)) E = E->Next;
+    }
+
+    if (E == Result)
+    {
+      if (NextIsForward) Result = E->Next;
+      else Result = E->Prev;
+    }
+    else
+    {
+      //there are more edges in the bound beyond result starting with E
+      if (NextIsForward)
+        E = Result->Next;
+      else
+        E = Result->Prev;
+      MinimaList::value_type locMin;
+      locMin.Y = E->Bot.Y;
+      locMin.LeftBound = 0;
+      locMin.RightBound = E;
+      E->WindDelta = 0;
+      Result = ProcessBound(E, NextIsForward);
+      m_MinimaList.push_back(locMin);
+    }
+    return Result;
+  }
+
+  TEdge *EStart;
+
+  if (IsHorizontal(*E))
+  {
+    //We need to be careful with open paths because this may not be a
+    //true local minima (ie E may be following a skip edge).
+    //Also, consecutive horz. edges may start heading left before going right.
+    if (NextIsForward)
+      EStart = E->Prev;
+    else
+      EStart = E->Next;
+    if (IsHorizontal(*EStart)) //ie an adjoining horizontal skip edge
+      {
+        if (EStart->Bot.X != E->Bot.X && EStart->Top.X != E->Bot.X)
+          ReverseHorizontal(*E);
+      }
+      else if (EStart->Bot.X != E->Bot.X)
+        ReverseHorizontal(*E);
+  }
+
+  EStart = E;
+  if (NextIsForward)
+  {
+    while (Result->Top.Y == Result->Next->Bot.Y && Result->Next->OutIdx != Skip)
+      Result = Result->Next;
+    if (IsHorizontal(*Result) && Result->Next->OutIdx != Skip)
+    {
+      //nb: at the top of a bound, horizontals are added to the bound
+      //only when the preceding edge attaches to the horizontal's left vertex
+      //unless a Skip edge is encountered when that becomes the top divide
+      Horz = Result;
+      while (IsHorizontal(*Horz->Prev)) Horz = Horz->Prev;
+      if (Horz->Prev->Top.X > Result->Next->Top.X) Result = Horz->Prev;
+    }
+    while (E != Result)
+    {
+      E->NextInLML = E->Next;
+      if (IsHorizontal(*E) && E != EStart &&
+        E->Bot.X != E->Prev->Top.X) ReverseHorizontal(*E);
+      E = E->Next;
+    }
+    if (IsHorizontal(*E) && E != EStart && E->Bot.X != E->Prev->Top.X)
+      ReverseHorizontal(*E);
+    Result = Result->Next; //move to the edge just beyond current bound
+  } else
+  {
+    while (Result->Top.Y == Result->Prev->Bot.Y && Result->Prev->OutIdx != Skip)
+      Result = Result->Prev;
+    if (IsHorizontal(*Result) && Result->Prev->OutIdx != Skip)
+    {
+      Horz = Result;
+      while (IsHorizontal(*Horz->Next)) Horz = Horz->Next;
+      if (Horz->Next->Top.X == Result->Prev->Top.X ||
+          Horz->Next->Top.X > Result->Prev->Top.X) Result = Horz->Next;
+    }
+
+    while (E != Result)
+    {
+      E->NextInLML = E->Prev;
+      if (IsHorizontal(*E) && E != EStart && E->Bot.X != E->Next->Top.X)
+        ReverseHorizontal(*E);
+      E = E->Prev;
+    }
+    if (IsHorizontal(*E) && E != EStart && E->Bot.X != E->Next->Top.X)
+      ReverseHorizontal(*E);
+    Result = Result->Prev; //move to the edge just beyond current bound
+  }
+
+  return Result;
+}
+//------------------------------------------------------------------------------
+
+bool ClipperBase::AddPath(const Path &pg, PolyType PolyTyp, bool Closed)
+{
+#ifdef use_lines
+  if (!Closed && PolyTyp == ptClip)
+    throw clipperException("AddPath: Open paths must be subject.");
+#else
+  if (!Closed)
+    throw clipperException("AddPath: Open paths have been disabled.");
+#endif
+
+  int highI = (int)pg.size() -1;
+  if (Closed) while (highI > 0 && (pg[highI] == pg[0])) --highI;
+  while (highI > 0 && (pg[highI] == pg[highI -1])) --highI;
+  if ((Closed && highI < 2) || (!Closed && highI < 1)) return false;
+
+  //create a new edge array ...
+  TEdge *edges = new TEdge [highI +1];
+
+  bool IsFlat = true;
+  //1. Basic (first) edge initialization ...
+  try
+  {
+    edges[1].Curr = pg[1];
+    RangeTest(pg[0], m_UseFullRange);
+    RangeTest(pg[highI], m_UseFullRange);
+    InitEdge(&edges[0], &edges[1], &edges[highI], pg[0]);
+    InitEdge(&edges[highI], &edges[0], &edges[highI-1], pg[highI]);
+    for (int i = highI - 1; i >= 1; --i)
+    {
+      RangeTest(pg[i], m_UseFullRange);
+      InitEdge(&edges[i], &edges[i+1], &edges[i-1], pg[i]);
+    }
+  }
+  catch(...)
+  {
+    delete [] edges;
+    throw; //range test fails
+  }
+  TEdge *eStart = &edges[0];
+
+  //2. Remove duplicate vertices, and (when closed) collinear edges ...
+  TEdge *E = eStart, *eLoopStop = eStart;
+  for (;;)
+  {
+    //nb: allows matching start and end points when not Closed ...
+    if (E->Curr == E->Next->Curr && (Closed || E->Next != eStart))
+    {
+      if (E == E->Next) break;
+      if (E == eStart) eStart = E->Next;
+      E = RemoveEdge(E);
+      eLoopStop = E;
+      continue;
+    }
+    if (E->Prev == E->Next)
+      break; //only two vertices
+    else if (Closed &&
+      SlopesEqual(E->Prev->Curr, E->Curr, E->Next->Curr, m_UseFullRange) &&
+      (!m_PreserveCollinear ||
+      !Pt2IsBetweenPt1AndPt3(E->Prev->Curr, E->Curr, E->Next->Curr)))
+    {
+      //Collinear edges are allowed for open paths but in closed paths
+      //the default is to merge adjacent collinear edges into a single edge.
+      //However, if the PreserveCollinear property is enabled, only overlapping
+      //collinear edges (ie spikes) will be removed from closed paths.
+      if (E == eStart) eStart = E->Next;
+      E = RemoveEdge(E);
+      E = E->Prev;
+      eLoopStop = E;
+      continue;
+    }
+    E = E->Next;
+    if ((E == eLoopStop) || (!Closed && E->Next == eStart)) break;
+  }
+
+  if ((!Closed && (E == E->Next)) || (Closed && (E->Prev == E->Next)))
+  {
+    delete [] edges;
+    return false;
+  }
+
+  if (!Closed)
+  {
+    m_HasOpenPaths = true;
+    eStart->Prev->OutIdx = Skip;
+  }
+
+  //3. Do second stage of edge initialization ...
+  E = eStart;
+  do
+  {
+    InitEdge2(*E, PolyTyp);
+    E = E->Next;
+    if (IsFlat && E->Curr.Y != eStart->Curr.Y) IsFlat = false;
+  }
+  while (E != eStart);
+
+  //4. Finally, add edge bounds to LocalMinima list ...
+
+  //Totally flat paths must be handled differently when adding them
+  //to LocalMinima list to avoid endless loops etc ...
+  if (IsFlat)
+  {
+    if (Closed)
+    {
+      delete [] edges;
+      return false;
+    }
+    E->Prev->OutIdx = Skip;
+    MinimaList::value_type locMin;
+    locMin.Y = E->Bot.Y;
+    locMin.LeftBound = 0;
+    locMin.RightBound = E;
+    locMin.RightBound->Side = esRight;
+    locMin.RightBound->WindDelta = 0;
+    for (;;)
+    {
+      if (E->Bot.X != E->Prev->Top.X) ReverseHorizontal(*E);
+      if (E->Next->OutIdx == Skip) break;
+      E->NextInLML = E->Next;
+      E = E->Next;
+    }
+    m_MinimaList.push_back(locMin);
+    m_edges.push_back(edges);
+	  return true;
+  }
+
+  m_edges.push_back(edges);
+  bool leftBoundIsForward;
+  TEdge* EMin = 0;
+
+  //workaround to avoid an endless loop in the while loop below when
+  //open paths have matching start and end points ...
+  if (E->Prev->Bot == E->Prev->Top) E = E->Next;
+
+  for (;;)
+  {
+    E = FindNextLocMin(E);
+    if (E == EMin) break;
+    else if (!EMin) EMin = E;
+
+    //E and E.Prev now share a local minima (left aligned if horizontal).
+    //Compare their slopes to find which starts which bound ...
+    MinimaList::value_type locMin;
+    locMin.Y = E->Bot.Y;
+    if (E->Dx < E->Prev->Dx)
+    {
+      locMin.LeftBound = E->Prev;
+      locMin.RightBound = E;
+      leftBoundIsForward = false; //Q.nextInLML = Q.prev
+    } else
+    {
+      locMin.LeftBound = E;
+      locMin.RightBound = E->Prev;
+      leftBoundIsForward = true; //Q.nextInLML = Q.next
+    }
+
+    if (!Closed) locMin.LeftBound->WindDelta = 0;
+    else if (locMin.LeftBound->Next == locMin.RightBound)
+      locMin.LeftBound->WindDelta = -1;
+    else locMin.LeftBound->WindDelta = 1;
+    locMin.RightBound->WindDelta = -locMin.LeftBound->WindDelta;
+
+    E = ProcessBound(locMin.LeftBound, leftBoundIsForward);
+    if (E->OutIdx == Skip) E = ProcessBound(E, leftBoundIsForward);
+
+    TEdge* E2 = ProcessBound(locMin.RightBound, !leftBoundIsForward);
+    if (E2->OutIdx == Skip) E2 = ProcessBound(E2, !leftBoundIsForward);
+
+    if (locMin.LeftBound->OutIdx == Skip)
+      locMin.LeftBound = 0;
+    else if (locMin.RightBound->OutIdx == Skip)
+      locMin.RightBound = 0;
+    m_MinimaList.push_back(locMin);
+    if (!leftBoundIsForward) E = E2;
+  }
+  return true;
+}
+//------------------------------------------------------------------------------
+
+bool ClipperBase::AddPaths(const Paths &ppg, PolyType PolyTyp, bool Closed)
+{
+  bool result = false;
+  for (Paths::size_type i = 0; i < ppg.size(); ++i)
+    if (AddPath(ppg[i], PolyTyp, Closed)) result = true;
+  return result;
+}
+//------------------------------------------------------------------------------
+
+void ClipperBase::Clear()
+{
+  DisposeLocalMinimaList();
+  for (EdgeList::size_type i = 0; i < m_edges.size(); ++i)
+  {
+    TEdge* edges = m_edges[i];
+    delete [] edges;
+  }
+  m_edges.clear();
+  m_UseFullRange = false;
+  m_HasOpenPaths = false;
+}
+//------------------------------------------------------------------------------
+
+void ClipperBase::Reset()
+{
+  m_CurrentLM = m_MinimaList.begin();
+  if (m_CurrentLM == m_MinimaList.end()) return; //ie nothing to process
+  std::sort(m_MinimaList.begin(), m_MinimaList.end(), LocMinSorter());
+
+  m_Scanbeam = ScanbeamList(); //clears/resets priority_queue
+  //reset all edges ...
+  for (MinimaList::iterator lm = m_MinimaList.begin(); lm != m_MinimaList.end(); ++lm)
+  {
+    InsertScanbeam(lm->Y);
+    TEdge* e = lm->LeftBound;
+    if (e)
+    {
+      e->Curr = e->Bot;
+      e->Side = esLeft;
+      e->OutIdx = Unassigned;
+    }
+
+    e = lm->RightBound;
+    if (e)
+    {
+      e->Curr = e->Bot;
+      e->Side = esRight;
+      e->OutIdx = Unassigned;
+    }
+  }
+  m_ActiveEdges = 0;
+  m_CurrentLM = m_MinimaList.begin();
+}
+//------------------------------------------------------------------------------
+
+void ClipperBase::DisposeLocalMinimaList()
+{
+  m_MinimaList.clear();
+  m_CurrentLM = m_MinimaList.begin();
+}
+//------------------------------------------------------------------------------
+
+bool ClipperBase::PopLocalMinima(cInt Y, const LocalMinimum *&locMin)
+{
+  if (m_CurrentLM == m_MinimaList.end() || (*m_CurrentLM).Y != Y) return false;
+  locMin = &(*m_CurrentLM);
+  ++m_CurrentLM;
+  return true;
+}
+//------------------------------------------------------------------------------
+
+IntRect ClipperBase::GetBounds()
+{
+  IntRect result;
+  MinimaList::iterator lm = m_MinimaList.begin();
+  if (lm == m_MinimaList.end())
+  {
+    result.left = result.top = result.right = result.bottom = 0;
+    return result;
+  }
+  result.left = lm->LeftBound->Bot.X;
+  result.top = lm->LeftBound->Bot.Y;
+  result.right = lm->LeftBound->Bot.X;
+  result.bottom = lm->LeftBound->Bot.Y;
+  while (lm != m_MinimaList.end())
+  {
+    //todo - needs fixing for open paths
+    result.bottom = std::max(result.bottom, lm->LeftBound->Bot.Y);
+    TEdge* e = lm->LeftBound;
+    for (;;) {
+      TEdge* bottomE = e;
+      while (e->NextInLML)
+      {
+        if (e->Bot.X < result.left) result.left = e->Bot.X;
+        if (e->Bot.X > result.right) result.right = e->Bot.X;
+        e = e->NextInLML;
+      }
+      result.left = std::min(result.left, e->Bot.X);
+      result.right = std::max(result.right, e->Bot.X);
+      result.left = std::min(result.left, e->Top.X);
+      result.right = std::max(result.right, e->Top.X);
+      result.top = std::min(result.top, e->Top.Y);
+      if (bottomE == lm->LeftBound) e = lm->RightBound;
+      else break;
+    }
+    ++lm;
+  }
+  return result;
+}
+//------------------------------------------------------------------------------
+
+void ClipperBase::InsertScanbeam(const cInt Y)
+{
+  m_Scanbeam.push(Y);
+}
+//------------------------------------------------------------------------------
+
+bool ClipperBase::PopScanbeam(cInt &Y)
+{
+  if (m_Scanbeam.empty()) return false;
+  Y = m_Scanbeam.top();
+  m_Scanbeam.pop();
+  while (!m_Scanbeam.empty() && Y == m_Scanbeam.top()) { m_Scanbeam.pop(); } // Pop duplicates.
+  return true;
+}
+//------------------------------------------------------------------------------
+
+void ClipperBase::DisposeAllOutRecs(){
+  for (PolyOutList::size_type i = 0; i < m_PolyOuts.size(); ++i)
+    DisposeOutRec(i);
+  m_PolyOuts.clear();
+}
+//------------------------------------------------------------------------------
+
+void ClipperBase::DisposeOutRec(PolyOutList::size_type index)
+{
+  OutRec *outRec = m_PolyOuts[index];
+  if (outRec->Pts) DisposeOutPts(outRec->Pts);
+  delete outRec;
+  m_PolyOuts[index] = 0;
+}
+//------------------------------------------------------------------------------
+
+void ClipperBase::DeleteFromAEL(TEdge *e)
+{
+  TEdge* AelPrev = e->PrevInAEL;
+  TEdge* AelNext = e->NextInAEL;
+  if (!AelPrev &&  !AelNext && (e != m_ActiveEdges)) return; //already deleted
+  if (AelPrev) AelPrev->NextInAEL = AelNext;
+  else m_ActiveEdges = AelNext;
+  if (AelNext) AelNext->PrevInAEL = AelPrev;
+  e->NextInAEL = 0;
+  e->PrevInAEL = 0;
+}
+//------------------------------------------------------------------------------
+
+OutRec* ClipperBase::CreateOutRec()
+{
+  OutRec* result = new OutRec;
+  result->IsHole = false;
+  result->IsOpen = false;
+  result->FirstLeft = 0;
+  result->Pts = 0;
+  result->BottomPt = 0;
+  result->PolyNd = 0;
+  m_PolyOuts.push_back(result);
+  result->Idx = (int)m_PolyOuts.size() - 1;
+  return result;
+}
+//------------------------------------------------------------------------------
+
+void ClipperBase::SwapPositionsInAEL(TEdge *Edge1, TEdge *Edge2)
+{
+  //check that one or other edge hasn't already been removed from AEL ...
+  if (Edge1->NextInAEL == Edge1->PrevInAEL ||
+    Edge2->NextInAEL == Edge2->PrevInAEL) return;
+
+  if (Edge1->NextInAEL == Edge2)
+  {
+    TEdge* Next = Edge2->NextInAEL;
+    if (Next) Next->PrevInAEL = Edge1;
+    TEdge* Prev = Edge1->PrevInAEL;
+    if (Prev) Prev->NextInAEL = Edge2;
+    Edge2->PrevInAEL = Prev;
+    Edge2->NextInAEL = Edge1;
+    Edge1->PrevInAEL = Edge2;
+    Edge1->NextInAEL = Next;
+  }
+  else if (Edge2->NextInAEL == Edge1)
+  {
+    TEdge* Next = Edge1->NextInAEL;
+    if (Next) Next->PrevInAEL = Edge2;
+    TEdge* Prev = Edge2->PrevInAEL;
+    if (Prev) Prev->NextInAEL = Edge1;
+    Edge1->PrevInAEL = Prev;
+    Edge1->NextInAEL = Edge2;
+    Edge2->PrevInAEL = Edge1;
+    Edge2->NextInAEL = Next;
+  }
+  else
+  {
+    TEdge* Next = Edge1->NextInAEL;
+    TEdge* Prev = Edge1->PrevInAEL;
+    Edge1->NextInAEL = Edge2->NextInAEL;
+    if (Edge1->NextInAEL) Edge1->NextInAEL->PrevInAEL = Edge1;
+    Edge1->PrevInAEL = Edge2->PrevInAEL;
+    if (Edge1->PrevInAEL) Edge1->PrevInAEL->NextInAEL = Edge1;
+    Edge2->NextInAEL = Next;
+    if (Edge2->NextInAEL) Edge2->NextInAEL->PrevInAEL = Edge2;
+    Edge2->PrevInAEL = Prev;
+    if (Edge2->PrevInAEL) Edge2->PrevInAEL->NextInAEL = Edge2;
+  }
+
+  if (!Edge1->PrevInAEL) m_ActiveEdges = Edge1;
+  else if (!Edge2->PrevInAEL) m_ActiveEdges = Edge2;
+}
+//------------------------------------------------------------------------------
+
+void ClipperBase::UpdateEdgeIntoAEL(TEdge *&e)
+{
+  if (!e->NextInLML)
+    throw clipperException("UpdateEdgeIntoAEL: invalid call");
+
+  e->NextInLML->OutIdx = e->OutIdx;
+  TEdge* AelPrev = e->PrevInAEL;
+  TEdge* AelNext = e->NextInAEL;
+  if (AelPrev) AelPrev->NextInAEL = e->NextInLML;
+  else m_ActiveEdges = e->NextInLML;
+  if (AelNext) AelNext->PrevInAEL = e->NextInLML;
+  e->NextInLML->Side = e->Side;
+  e->NextInLML->WindDelta = e->WindDelta;
+  e->NextInLML->WindCnt = e->WindCnt;
+  e->NextInLML->WindCnt2 = e->WindCnt2;
+  e = e->NextInLML;
+  e->Curr = e->Bot;
+  e->PrevInAEL = AelPrev;
+  e->NextInAEL = AelNext;
+  if (!IsHorizontal(*e)) InsertScanbeam(e->Top.Y);
+}
+//------------------------------------------------------------------------------
+
+bool ClipperBase::LocalMinimaPending()
+{
+  return (m_CurrentLM != m_MinimaList.end());
+}
+
+//------------------------------------------------------------------------------
+// TClipper methods ...
+//------------------------------------------------------------------------------
+
+Clipper::Clipper(int initOptions) : ClipperBase() //constructor
+{
+  m_ExecuteLocked = false;
+  m_UseFullRange = false;
+  m_ReverseOutput = ((initOptions & ioReverseSolution) != 0);
+  m_StrictSimple = ((initOptions & ioStrictlySimple) != 0);
+  m_PreserveCollinear = ((initOptions & ioPreserveCollinear) != 0);
+  m_HasOpenPaths = false;
+#ifdef use_xyz
+  m_ZFill = 0;
+#endif
+}
+//------------------------------------------------------------------------------
+
+#ifdef use_xyz
+void Clipper::ZFillFunction(ZFillCallback zFillFunc)
+{
+  m_ZFill = zFillFunc;
+}
+//------------------------------------------------------------------------------
+#endif
+
+bool Clipper::Execute(ClipType clipType, Paths &solution, PolyFillType fillType)
+{
+    return Execute(clipType, solution, fillType, fillType);
+}
+//------------------------------------------------------------------------------
+
+bool Clipper::Execute(ClipType clipType, PolyTree &polytree, PolyFillType fillType)
+{
+    return Execute(clipType, polytree, fillType, fillType);
+}
+//------------------------------------------------------------------------------
+
+bool Clipper::Execute(ClipType clipType, Paths &solution,
+    PolyFillType subjFillType, PolyFillType clipFillType)
+{
+  if( m_ExecuteLocked ) return false;
+  if (m_HasOpenPaths)
+    throw clipperException("Error: PolyTree struct is needed for open path clipping.");
+  m_ExecuteLocked = true;
+  solution.resize(0);
+  m_SubjFillType = subjFillType;
+  m_ClipFillType = clipFillType;
+  m_ClipType = clipType;
+  m_UsingPolyTree = false;
+  bool succeeded = ExecuteInternal();
+  if (succeeded) BuildResult(solution);
+  DisposeAllOutRecs();
+  m_ExecuteLocked = false;
+  return succeeded;
+}
+//------------------------------------------------------------------------------
+
+bool Clipper::Execute(ClipType clipType, PolyTree& polytree,
+    PolyFillType subjFillType, PolyFillType clipFillType)
+{
+  if( m_ExecuteLocked ) return false;
+  m_ExecuteLocked = true;
+  m_SubjFillType = subjFillType;
+  m_ClipFillType = clipFillType;
+  m_ClipType = clipType;
+  m_UsingPolyTree = true;
+  bool succeeded = ExecuteInternal();
+  if (succeeded) BuildResult2(polytree);
+  DisposeAllOutRecs();
+  m_ExecuteLocked = false;
+  return succeeded;
+}
+//------------------------------------------------------------------------------
+
+void Clipper::FixHoleLinkage(OutRec &outrec)
+{
+  //skip OutRecs that (a) contain outermost polygons or
+  //(b) already have the correct owner/child linkage ...
+  if (!outrec.FirstLeft ||
+      (outrec.IsHole != outrec.FirstLeft->IsHole &&
+      outrec.FirstLeft->Pts)) return;
+
+  OutRec* orfl = outrec.FirstLeft;
+  while (orfl && ((orfl->IsHole == outrec.IsHole) || !orfl->Pts))
+      orfl = orfl->FirstLeft;
+  outrec.FirstLeft = orfl;
+}
+//------------------------------------------------------------------------------
+
+bool Clipper::ExecuteInternal()
+{
+  bool succeeded = true;
+  try {
+    Reset();
+    m_Maxima = MaximaList();
+    m_SortedEdges = 0;
+
+    succeeded = true;
+    cInt botY, topY;
+    if (!PopScanbeam(botY)) return false;
+    InsertLocalMinimaIntoAEL(botY);
+    while (PopScanbeam(topY) || LocalMinimaPending())
+    {
+      ProcessHorizontals();
+	    ClearGhostJoins();
+      if (!ProcessIntersections(topY))
+      {
+        succeeded = false;
+        break;
+      }
+      ProcessEdgesAtTopOfScanbeam(topY);
+      botY = topY;
+      InsertLocalMinimaIntoAEL(botY);
+    }
+  }
+  catch(...)
+  {
+    succeeded = false;
+  }
+
+  if (succeeded)
+  {
+    //fix orientations ...
+    for (PolyOutList::size_type i = 0; i < m_PolyOuts.size(); ++i)
+    {
+      OutRec *outRec = m_PolyOuts[i];
+      if (!outRec->Pts || outRec->IsOpen) continue;
+      if ((outRec->IsHole ^ m_ReverseOutput) == (Area(*outRec) > 0))
+        ReversePolyPtLinks(outRec->Pts);
+    }
+
+    if (!m_Joins.empty()) JoinCommonEdges();
+
+    //unfortunately FixupOutPolygon() must be done after JoinCommonEdges()
+    for (PolyOutList::size_type i = 0; i < m_PolyOuts.size(); ++i)
+    {
+      OutRec *outRec = m_PolyOuts[i];
+      if (!outRec->Pts) continue;
+      if (outRec->IsOpen)
+        FixupOutPolyline(*outRec);
+      else
+        FixupOutPolygon(*outRec);
+    }
+
+    if (m_StrictSimple) DoSimplePolygons();
+  }
+
+  ClearJoins();
+  ClearGhostJoins();
+  return succeeded;
+}
+//------------------------------------------------------------------------------
+
+void Clipper::SetWindingCount(TEdge &edge)
+{
+  TEdge *e = edge.PrevInAEL;
+  //find the edge of the same polytype that immediately preceeds 'edge' in AEL
+  while (e  && ((e->PolyTyp != edge.PolyTyp) || (e->WindDelta == 0))) e = e->PrevInAEL;
+  if (!e)
+  {
+    if (edge.WindDelta == 0)
+    {
+      PolyFillType pft = (edge.PolyTyp == ptSubject ? m_SubjFillType : m_ClipFillType);
+      edge.WindCnt = (pft == pftNegative ? -1 : 1);
+    }
+    else
+      edge.WindCnt = edge.WindDelta;
+    edge.WindCnt2 = 0;
+    e = m_ActiveEdges; //ie get ready to calc WindCnt2
+  }
+  else if (edge.WindDelta == 0 && m_ClipType != ctUnion)
+  {
+    edge.WindCnt = 1;
+    edge.WindCnt2 = e->WindCnt2;
+    e = e->NextInAEL; //ie get ready to calc WindCnt2
+  }
+  else if (IsEvenOddFillType(edge))
+  {
+    //EvenOdd filling ...
+    if (edge.WindDelta == 0)
+    {
+      //are we inside a subj polygon ...
+      bool Inside = true;
+      TEdge *e2 = e->PrevInAEL;
+      while (e2)
+      {
+        if (e2->PolyTyp == e->PolyTyp && e2->WindDelta != 0)
+          Inside = !Inside;
+        e2 = e2->PrevInAEL;
+      }
+      edge.WindCnt = (Inside ? 0 : 1);
+    }
+    else
+    {
+      edge.WindCnt = edge.WindDelta;
+    }
+    edge.WindCnt2 = e->WindCnt2;
+    e = e->NextInAEL; //ie get ready to calc WindCnt2
+  }
+  else
+  {
+    //nonZero, Positive or Negative filling ...
+    if (e->WindCnt * e->WindDelta < 0)
+    {
+      //prev edge is 'decreasing' WindCount (WC) toward zero
+      //so we're outside the previous polygon ...
+      if (Abs(e->WindCnt) > 1)
+      {
+        //outside prev poly but still inside another.
+        //when reversing direction of prev poly use the same WC
+        if (e->WindDelta * edge.WindDelta < 0) edge.WindCnt = e->WindCnt;
+        //otherwise continue to 'decrease' WC ...
+        else edge.WindCnt = e->WindCnt + edge.WindDelta;
+      }
+      else
+        //now outside all polys of same polytype so set own WC ...
+        edge.WindCnt = (edge.WindDelta == 0 ? 1 : edge.WindDelta);
+    } else
+    {
+      //prev edge is 'increasing' WindCount (WC) away from zero
+      //so we're inside the previous polygon ...
+      if (edge.WindDelta == 0)
+        edge.WindCnt = (e->WindCnt < 0 ? e->WindCnt - 1 : e->WindCnt + 1);
+      //if wind direction is reversing prev then use same WC
+      else if (e->WindDelta * edge.WindDelta < 0) edge.WindCnt = e->WindCnt;
+      //otherwise add to WC ...
+      else edge.WindCnt = e->WindCnt + edge.WindDelta;
+    }
+    edge.WindCnt2 = e->WindCnt2;
+    e = e->NextInAEL; //ie get ready to calc WindCnt2
+  }
+
+  //update WindCnt2 ...
+  if (IsEvenOddAltFillType(edge))
+  {
+    //EvenOdd filling ...
+    while (e != &edge)
+    {
+      if (e->WindDelta != 0)
+        edge.WindCnt2 = (edge.WindCnt2 == 0 ? 1 : 0);
+      e = e->NextInAEL;
+    }
+  } else
+  {
+    //nonZero, Positive or Negative filling ...
+    while ( e != &edge )
+    {
+      edge.WindCnt2 += e->WindDelta;
+      e = e->NextInAEL;
+    }
+  }
+}
+//------------------------------------------------------------------------------
+
+bool Clipper::IsEvenOddFillType(const TEdge& edge) const
+{
+  if (edge.PolyTyp == ptSubject)
+    return m_SubjFillType == pftEvenOdd; else
+    return m_ClipFillType == pftEvenOdd;
+}
+//------------------------------------------------------------------------------
+
+bool Clipper::IsEvenOddAltFillType(const TEdge& edge) const
+{
+  if (edge.PolyTyp == ptSubject)
+    return m_ClipFillType == pftEvenOdd; else
+    return m_SubjFillType == pftEvenOdd;
+}
+//------------------------------------------------------------------------------
+
+bool Clipper::IsContributing(const TEdge& edge) const
+{
+  PolyFillType pft, pft2;
+  if (edge.PolyTyp == ptSubject)
+  {
+    pft = m_SubjFillType;
+    pft2 = m_ClipFillType;
+  } else
+  {
+    pft = m_ClipFillType;
+    pft2 = m_SubjFillType;
+  }
+
+  switch(pft)
+  {
+    case pftEvenOdd:
+      //return false if a subj line has been flagged as inside a subj polygon
+      if (edge.WindDelta == 0 && edge.WindCnt != 1) return false;
+      break;
+    case pftNonZero:
+      if (Abs(edge.WindCnt) != 1) return false;
+      break;
+    case pftPositive:
+      if (edge.WindCnt != 1) return false;
+      break;
+    default: //pftNegative
+      if (edge.WindCnt != -1) return false;
+  }
+
+  switch(m_ClipType)
+  {
+    case ctIntersection:
+      switch(pft2)
+      {
+        case pftEvenOdd:
+        case pftNonZero:
+          return (edge.WindCnt2 != 0);
+        case pftPositive:
+          return (edge.WindCnt2 > 0);
+        default:
+          return (edge.WindCnt2 < 0);
+      }
+      break;
+    case ctUnion:
+      switch(pft2)
+      {
+        case pftEvenOdd:
+        case pftNonZero:
+          return (edge.WindCnt2 == 0);
+        case pftPositive:
+          return (edge.WindCnt2 <= 0);
+        default:
+          return (edge.WindCnt2 >= 0);
+      }
+      break;
+    case ctDifference:
+      if (edge.PolyTyp == ptSubject)
+        switch(pft2)
+        {
+          case pftEvenOdd:
+          case pftNonZero:
+            return (edge.WindCnt2 == 0);
+          case pftPositive:
+            return (edge.WindCnt2 <= 0);
+          default:
+            return (edge.WindCnt2 >= 0);
+        }
+      else
+        switch(pft2)
+        {
+          case pftEvenOdd:
+          case pftNonZero:
+            return (edge.WindCnt2 != 0);
+          case pftPositive:
+            return (edge.WindCnt2 > 0);
+          default:
+            return (edge.WindCnt2 < 0);
+        }
+      break;
+    case ctXor:
+      if (edge.WindDelta == 0) //XOr always contributing unless open
+        switch(pft2)
+        {
+          case pftEvenOdd:
+          case pftNonZero:
+            return (edge.WindCnt2 == 0);
+          case pftPositive:
+            return (edge.WindCnt2 <= 0);
+          default:
+            return (edge.WindCnt2 >= 0);
+        }
+      else
+        return true;
+      break;
+    default:
+      return true;
+  }
+}
+//------------------------------------------------------------------------------
+
+OutPt* Clipper::AddLocalMinPoly(TEdge *e1, TEdge *e2, const IntPoint &Pt)
+{
+  OutPt* result;
+  TEdge *e, *prevE;
+  if (IsHorizontal(*e2) || ( e1->Dx > e2->Dx ))
+  {
+    result = AddOutPt(e1, Pt);
+    e2->OutIdx = e1->OutIdx;
+    e1->Side = esLeft;
+    e2->Side = esRight;
+    e = e1;
+    if (e->PrevInAEL == e2)
+      prevE = e2->PrevInAEL;
+    else
+      prevE = e->PrevInAEL;
+  } else
+  {
+    result = AddOutPt(e2, Pt);
+    e1->OutIdx = e2->OutIdx;
+    e1->Side = esRight;
+    e2->Side = esLeft;
+    e = e2;
+    if (e->PrevInAEL == e1)
+        prevE = e1->PrevInAEL;
+    else
+        prevE = e->PrevInAEL;
+  }
+
+  if (prevE && prevE->OutIdx >= 0 && prevE->Top.Y < Pt.Y && e->Top.Y < Pt.Y)
+  {
+    cInt xPrev = TopX(*prevE, Pt.Y);
+    cInt xE = TopX(*e, Pt.Y);
+    if (xPrev == xE && (e->WindDelta != 0) && (prevE->WindDelta != 0) &&
+      SlopesEqual(IntPoint(xPrev, Pt.Y), prevE->Top, IntPoint(xE, Pt.Y), e->Top, m_UseFullRange))
+    {
+      OutPt* outPt = AddOutPt(prevE, Pt);
+      AddJoin(result, outPt, e->Top);
+    }
+  }
+  return result;
+}
+//------------------------------------------------------------------------------
+
+void Clipper::AddLocalMaxPoly(TEdge *e1, TEdge *e2, const IntPoint &Pt)
+{
+  AddOutPt( e1, Pt );
+  if (e2->WindDelta == 0) AddOutPt(e2, Pt);
+  if( e1->OutIdx == e2->OutIdx )
+  {
+    e1->OutIdx = Unassigned;
+    e2->OutIdx = Unassigned;
+  }
+  else if (e1->OutIdx < e2->OutIdx)
+    AppendPolygon(e1, e2);
+  else
+    AppendPolygon(e2, e1);
+}
+//------------------------------------------------------------------------------
+
+void Clipper::AddEdgeToSEL(TEdge *edge)
+{
+  //SEL pointers in PEdge are reused to build a list of horizontal edges.
+  //However, we don't need to worry about order with horizontal edge processing.
+  if( !m_SortedEdges )
+  {
+    m_SortedEdges = edge;
+    edge->PrevInSEL = 0;
+    edge->NextInSEL = 0;
+  }
+  else
+  {
+    edge->NextInSEL = m_SortedEdges;
+    edge->PrevInSEL = 0;
+    m_SortedEdges->PrevInSEL = edge;
+    m_SortedEdges = edge;
+  }
+}
+//------------------------------------------------------------------------------
+
+bool Clipper::PopEdgeFromSEL(TEdge *&edge)
+{
+  if (!m_SortedEdges) return false;
+  edge = m_SortedEdges;
+  DeleteFromSEL(m_SortedEdges);
+  return true;
+}
+//------------------------------------------------------------------------------
+
+void Clipper::CopyAELToSEL()
+{
+  TEdge* e = m_ActiveEdges;
+  m_SortedEdges = e;
+  while ( e )
+  {
+    e->PrevInSEL = e->PrevInAEL;
+    e->NextInSEL = e->NextInAEL;
+    e = e->NextInAEL;
+  }
+}
+//------------------------------------------------------------------------------
+
+void Clipper::AddJoin(OutPt *op1, OutPt *op2, const IntPoint OffPt)
+{
+  Join* j = new Join;
+  j->OutPt1 = op1;
+  j->OutPt2 = op2;
+  j->OffPt = OffPt;
+  m_Joins.push_back(j);
+}
+//------------------------------------------------------------------------------
+
+void Clipper::ClearJoins()
+{
+  for (JoinList::size_type i = 0; i < m_Joins.size(); i++)
+    delete m_Joins[i];
+  m_Joins.resize(0);
+}
+//------------------------------------------------------------------------------
+
+void Clipper::ClearGhostJoins()
+{
+  for (JoinList::size_type i = 0; i < m_GhostJoins.size(); i++)
+    delete m_GhostJoins[i];
+  m_GhostJoins.resize(0);
+}
+//------------------------------------------------------------------------------
+
+void Clipper::AddGhostJoin(OutPt *op, const IntPoint OffPt)
+{
+  Join* j = new Join;
+  j->OutPt1 = op;
+  j->OutPt2 = 0;
+  j->OffPt = OffPt;
+  m_GhostJoins.push_back(j);
+}
+//------------------------------------------------------------------------------
+
+void Clipper::InsertLocalMinimaIntoAEL(const cInt botY)
+{
+  const LocalMinimum *lm;
+  while (PopLocalMinima(botY, lm))
+  {
+    TEdge* lb = lm->LeftBound;
+    TEdge* rb = lm->RightBound;
+
+    OutPt *Op1 = 0;
+    if (!lb)
+    {
+      //nb: don't insert LB into either AEL or SEL
+      InsertEdgeIntoAEL(rb, 0);
+      SetWindingCount(*rb);
+      if (IsContributing(*rb))
+        Op1 = AddOutPt(rb, rb->Bot);
+    }
+    else if (!rb)
+    {
+      InsertEdgeIntoAEL(lb, 0);
+      SetWindingCount(*lb);
+      if (IsContributing(*lb))
+        Op1 = AddOutPt(lb, lb->Bot);
+      InsertScanbeam(lb->Top.Y);
+    }
+    else
+    {
+      InsertEdgeIntoAEL(lb, 0);
+      InsertEdgeIntoAEL(rb, lb);
+      SetWindingCount( *lb );
+      rb->WindCnt = lb->WindCnt;
+      rb->WindCnt2 = lb->WindCnt2;
+      if (IsContributing(*lb))
+        Op1 = AddLocalMinPoly(lb, rb, lb->Bot);
+      InsertScanbeam(lb->Top.Y);
+    }
+
+     if (rb)
+     {
+		 if (IsHorizontal(*rb))
+		 {
+			 AddEdgeToSEL(rb);
+			 if (rb->NextInLML)
+				 InsertScanbeam(rb->NextInLML->Top.Y);
+		 }
+		 else InsertScanbeam( rb->Top.Y );
+     }
+
+    if (!lb || !rb) continue;
+
+    //if any output polygons share an edge, they'll need joining later ...
+    if (Op1 && IsHorizontal(*rb) &&
+      m_GhostJoins.size() > 0 && (rb->WindDelta != 0))
+    {
+      for (JoinList::size_type i = 0; i < m_GhostJoins.size(); ++i)
+      {
+        Join* jr = m_GhostJoins[i];
+        //if the horizontal Rb and a 'ghost' horizontal overlap, then convert
+        //the 'ghost' join to a real join ready for later ...
+        if (HorzSegmentsOverlap(jr->OutPt1->Pt.X, jr->OffPt.X, rb->Bot.X, rb->Top.X))
+          AddJoin(jr->OutPt1, Op1, jr->OffPt);
+      }
+    }
+
+    if (lb->OutIdx >= 0 && lb->PrevInAEL &&
+      lb->PrevInAEL->Curr.X == lb->Bot.X &&
+      lb->PrevInAEL->OutIdx >= 0 &&
+      SlopesEqual(lb->PrevInAEL->Bot, lb->PrevInAEL->Top, lb->Curr, lb->Top, m_UseFullRange) &&
+      (lb->WindDelta != 0) && (lb->PrevInAEL->WindDelta != 0))
+    {
+        OutPt *Op2 = AddOutPt(lb->PrevInAEL, lb->Bot);
+        AddJoin(Op1, Op2, lb->Top);
+    }
+
+    if(lb->NextInAEL != rb)
+    {
+
+      if (rb->OutIdx >= 0 && rb->PrevInAEL->OutIdx >= 0 &&
+        SlopesEqual(rb->PrevInAEL->Curr, rb->PrevInAEL->Top, rb->Curr, rb->Top, m_UseFullRange) &&
+        (rb->WindDelta != 0) && (rb->PrevInAEL->WindDelta != 0))
+      {
+          OutPt *Op2 = AddOutPt(rb->PrevInAEL, rb->Bot);
+          AddJoin(Op1, Op2, rb->Top);
+      }
+
+      TEdge* e = lb->NextInAEL;
+      if (e)
+      {
+        while( e != rb )
+        {
+          //nb: For calculating winding counts etc, IntersectEdges() assumes
+          //that param1 will be to the Right of param2 ABOVE the intersection ...
+          IntersectEdges(rb , e , lb->Curr); //order important here
+          e = e->NextInAEL;
+        }
+      }
+    }
+
+  }
+}
+//------------------------------------------------------------------------------
+
+void Clipper::DeleteFromSEL(TEdge *e)
+{
+  TEdge* SelPrev = e->PrevInSEL;
+  TEdge* SelNext = e->NextInSEL;
+  if( !SelPrev &&  !SelNext && (e != m_SortedEdges) ) return; //already deleted
+  if( SelPrev ) SelPrev->NextInSEL = SelNext;
+  else m_SortedEdges = SelNext;
+  if( SelNext ) SelNext->PrevInSEL = SelPrev;
+  e->NextInSEL = 0;
+  e->PrevInSEL = 0;
+}
+//------------------------------------------------------------------------------
+
+#ifdef use_xyz
+void Clipper::SetZ(IntPoint& pt, TEdge& e1, TEdge& e2)
+{
+  if (pt.Z != 0 || !m_ZFill) return;
+  else if (pt == e1.Bot) pt.Z = e1.Bot.Z;
+  else if (pt == e1.Top) pt.Z = e1.Top.Z;
+  else if (pt == e2.Bot) pt.Z = e2.Bot.Z;
+  else if (pt == e2.Top) pt.Z = e2.Top.Z;
+  else (*m_ZFill)(e1.Bot, e1.Top, e2.Bot, e2.Top, pt);
+}
+//------------------------------------------------------------------------------
+#endif
+
+void Clipper::IntersectEdges(TEdge *e1, TEdge *e2, IntPoint &Pt)
+{
+  bool e1Contributing = ( e1->OutIdx >= 0 );
+  bool e2Contributing = ( e2->OutIdx >= 0 );
+
+#ifdef use_xyz
+        SetZ(Pt, *e1, *e2);
+#endif
+
+#ifdef use_lines
+  //if either edge is on an OPEN path ...
+  if (e1->WindDelta == 0 || e2->WindDelta == 0)
+  {
+    //ignore subject-subject open path intersections UNLESS they
+    //are both open paths, AND they are both 'contributing maximas' ...
+	if (e1->WindDelta == 0 && e2->WindDelta == 0) return;
+
+    //if intersecting a subj line with a subj poly ...
+    else if (e1->PolyTyp == e2->PolyTyp &&
+      e1->WindDelta != e2->WindDelta && m_ClipType == ctUnion)
+    {
+      if (e1->WindDelta == 0)
+      {
+        if (e2Contributing)
+        {
+          AddOutPt(e1, Pt);
+          if (e1Contributing) e1->OutIdx = Unassigned;
+        }
+      }
+      else
+      {
+        if (e1Contributing)
+        {
+          AddOutPt(e2, Pt);
+          if (e2Contributing) e2->OutIdx = Unassigned;
+        }
+      }
+    }
+    else if (e1->PolyTyp != e2->PolyTyp)
+    {
+      //toggle subj open path OutIdx on/off when Abs(clip.WndCnt) == 1 ...
+      if ((e1->WindDelta == 0) && abs(e2->WindCnt) == 1 &&
+        (m_ClipType != ctUnion || e2->WindCnt2 == 0))
+      {
+        AddOutPt(e1, Pt);
+        if (e1Contributing) e1->OutIdx = Unassigned;
+      }
+      else if ((e2->WindDelta == 0) && (abs(e1->WindCnt) == 1) &&
+        (m_ClipType != ctUnion || e1->WindCnt2 == 0))
+      {
+        AddOutPt(e2, Pt);
+        if (e2Contributing) e2->OutIdx = Unassigned;
+      }
+    }
+    return;
+  }
+#endif
+
+  //update winding counts...
+  //assumes that e1 will be to the Right of e2 ABOVE the intersection
+  if ( e1->PolyTyp == e2->PolyTyp )
+  {
+    if ( IsEvenOddFillType( *e1) )
+    {
+      int oldE1WindCnt = e1->WindCnt;
+      e1->WindCnt = e2->WindCnt;
+      e2->WindCnt = oldE1WindCnt;
+    } else
+    {
+      if (e1->WindCnt + e2->WindDelta == 0 ) e1->WindCnt = -e1->WindCnt;
+      else e1->WindCnt += e2->WindDelta;
+      if ( e2->WindCnt - e1->WindDelta == 0 ) e2->WindCnt = -e2->WindCnt;
+      else e2->WindCnt -= e1->WindDelta;
+    }
+  } else
+  {
+    if (!IsEvenOddFillType(*e2)) e1->WindCnt2 += e2->WindDelta;
+    else e1->WindCnt2 = ( e1->WindCnt2 == 0 ) ? 1 : 0;
+    if (!IsEvenOddFillType(*e1)) e2->WindCnt2 -= e1->WindDelta;
+    else e2->WindCnt2 = ( e2->WindCnt2 == 0 ) ? 1 : 0;
+  }
+
+  PolyFillType e1FillType, e2FillType, e1FillType2, e2FillType2;
+  if (e1->PolyTyp == ptSubject)
+  {
+    e1FillType = m_SubjFillType;
+    e1FillType2 = m_ClipFillType;
+  } else
+  {
+    e1FillType = m_ClipFillType;
+    e1FillType2 = m_SubjFillType;
+  }
+  if (e2->PolyTyp == ptSubject)
+  {
+    e2FillType = m_SubjFillType;
+    e2FillType2 = m_ClipFillType;
+  } else
+  {
+    e2FillType = m_ClipFillType;
+    e2FillType2 = m_SubjFillType;
+  }
+
+  cInt e1Wc, e2Wc;
+  switch (e1FillType)
+  {
+    case pftPositive: e1Wc = e1->WindCnt; break;
+    case pftNegative: e1Wc = -e1->WindCnt; break;
+    default: e1Wc = Abs(e1->WindCnt);
+  }
+  switch(e2FillType)
+  {
+    case pftPositive: e2Wc = e2->WindCnt; break;
+    case pftNegative: e2Wc = -e2->WindCnt; break;
+    default: e2Wc = Abs(e2->WindCnt);
+  }
+
+  if ( e1Contributing && e2Contributing )
+  {
+    if ((e1Wc != 0 && e1Wc != 1) || (e2Wc != 0 && e2Wc != 1) ||
+      (e1->PolyTyp != e2->PolyTyp && m_ClipType != ctXor) )
+    {
+      AddLocalMaxPoly(e1, e2, Pt);
+    }
+    else
+    {
+      AddOutPt(e1, Pt);
+      AddOutPt(e2, Pt);
+      SwapSides( *e1 , *e2 );
+      SwapPolyIndexes( *e1 , *e2 );
+    }
+  }
+  else if ( e1Contributing )
+  {
+    if (e2Wc == 0 || e2Wc == 1)
+    {
+      AddOutPt(e1, Pt);
+      SwapSides(*e1, *e2);
+      SwapPolyIndexes(*e1, *e2);
+    }
+  }
+  else if ( e2Contributing )
+  {
+    if (e1Wc == 0 || e1Wc == 1)
+    {
+      AddOutPt(e2, Pt);
+      SwapSides(*e1, *e2);
+      SwapPolyIndexes(*e1, *e2);
+    }
+  }
+  else if ( (e1Wc == 0 || e1Wc == 1) && (e2Wc == 0 || e2Wc == 1))
+  {
+    //neither edge is currently contributing ...
+
+    cInt e1Wc2, e2Wc2;
+    switch (e1FillType2)
+    {
+      case pftPositive: e1Wc2 = e1->WindCnt2; break;
+      case pftNegative : e1Wc2 = -e1->WindCnt2; break;
+      default: e1Wc2 = Abs(e1->WindCnt2);
+    }
+    switch (e2FillType2)
+    {
+      case pftPositive: e2Wc2 = e2->WindCnt2; break;
+      case pftNegative: e2Wc2 = -e2->WindCnt2; break;
+      default: e2Wc2 = Abs(e2->WindCnt2);
+    }
+
+    if (e1->PolyTyp != e2->PolyTyp)
+    {
+      AddLocalMinPoly(e1, e2, Pt);
+    }
+    else if (e1Wc == 1 && e2Wc == 1)
+      switch( m_ClipType ) {
+        case ctIntersection:
+          if (e1Wc2 > 0 && e2Wc2 > 0)
+            AddLocalMinPoly(e1, e2, Pt);
+          break;
+        case ctUnion:
+          if ( e1Wc2 <= 0 && e2Wc2 <= 0 )
+            AddLocalMinPoly(e1, e2, Pt);
+          break;
+        case ctDifference:
+          if (((e1->PolyTyp == ptClip) && (e1Wc2 > 0) && (e2Wc2 > 0)) ||
+              ((e1->PolyTyp == ptSubject) && (e1Wc2 <= 0) && (e2Wc2 <= 0)))
+                AddLocalMinPoly(e1, e2, Pt);
+          break;
+        case ctXor:
+          AddLocalMinPoly(e1, e2, Pt);
+      }
+    else
+      SwapSides( *e1, *e2 );
+  }
+}
+//------------------------------------------------------------------------------
+
+void Clipper::SetHoleState(TEdge *e, OutRec *outrec)
+{
+  TEdge *e2 = e->PrevInAEL;
+  TEdge *eTmp = 0;
+  while (e2)
+  {
+    if (e2->OutIdx >= 0 && e2->WindDelta != 0)
+    {
+      if (!eTmp) eTmp = e2;
+      else if (eTmp->OutIdx == e2->OutIdx) eTmp = 0;
+    }
+    e2 = e2->PrevInAEL;
+  }
+  if (!eTmp)
+  {
+    outrec->FirstLeft = 0;
+    outrec->IsHole = false;
+  }
+  else
+  {
+    outrec->FirstLeft = m_PolyOuts[eTmp->OutIdx];
+    outrec->IsHole = !outrec->FirstLeft->IsHole;
+  }
+}
+//------------------------------------------------------------------------------
+
+OutRec* GetLowermostRec(OutRec *outRec1, OutRec *outRec2)
+{
+  //work out which polygon fragment has the correct hole state ...
+  if (!outRec1->BottomPt)
+    outRec1->BottomPt = GetBottomPt(outRec1->Pts);
+  if (!outRec2->BottomPt)
+    outRec2->BottomPt = GetBottomPt(outRec2->Pts);
+  OutPt *OutPt1 = outRec1->BottomPt;
+  OutPt *OutPt2 = outRec2->BottomPt;
+  if (OutPt1->Pt.Y > OutPt2->Pt.Y) return outRec1;
+  else if (OutPt1->Pt.Y < OutPt2->Pt.Y) return outRec2;
+  else if (OutPt1->Pt.X < OutPt2->Pt.X) return outRec1;
+  else if (OutPt1->Pt.X > OutPt2->Pt.X) return outRec2;
+  else if (OutPt1->Next == OutPt1) return outRec2;
+  else if (OutPt2->Next == OutPt2) return outRec1;
+  else if (FirstIsBottomPt(OutPt1, OutPt2)) return outRec1;
+  else return outRec2;
+}
+//------------------------------------------------------------------------------
+
+bool OutRec1RightOfOutRec2(OutRec* outRec1, OutRec* outRec2)
+{
+  do
+  {
+    outRec1 = outRec1->FirstLeft;
+    if (outRec1 == outRec2) return true;
+  } while (outRec1);
+  return false;
+}
+//------------------------------------------------------------------------------
+
+OutRec* Clipper::GetOutRec(int Idx)
+{
+  OutRec* outrec = m_PolyOuts[Idx];
+  while (outrec != m_PolyOuts[outrec->Idx])
+    outrec = m_PolyOuts[outrec->Idx];
+  return outrec;
+}
+//------------------------------------------------------------------------------
+
+void Clipper::AppendPolygon(TEdge *e1, TEdge *e2)
+{
+  //get the start and ends of both output polygons ...
+  OutRec *outRec1 = m_PolyOuts[e1->OutIdx];
+  OutRec *outRec2 = m_PolyOuts[e2->OutIdx];
+
+  OutRec *holeStateRec;
+  if (OutRec1RightOfOutRec2(outRec1, outRec2))
+    holeStateRec = outRec2;
+  else if (OutRec1RightOfOutRec2(outRec2, outRec1))
+    holeStateRec = outRec1;
+  else
+    holeStateRec = GetLowermostRec(outRec1, outRec2);
+
+  //get the start and ends of both output polygons and
+  //join e2 poly onto e1 poly and delete pointers to e2 ...
+
+  OutPt* p1_lft = outRec1->Pts;
+  OutPt* p1_rt = p1_lft->Prev;
+  OutPt* p2_lft = outRec2->Pts;
+  OutPt* p2_rt = p2_lft->Prev;
+
+  //join e2 poly onto e1 poly and delete pointers to e2 ...
+  if(  e1->Side == esLeft )
+  {
+    if(  e2->Side == esLeft )
+    {
+      //z y x a b c
+      ReversePolyPtLinks(p2_lft);
+      p2_lft->Next = p1_lft;
+      p1_lft->Prev = p2_lft;
+      p1_rt->Next = p2_rt;
+      p2_rt->Prev = p1_rt;
+      outRec1->Pts = p2_rt;
+    } else
+    {
+      //x y z a b c
+      p2_rt->Next = p1_lft;
+      p1_lft->Prev = p2_rt;
+      p2_lft->Prev = p1_rt;
+      p1_rt->Next = p2_lft;
+      outRec1->Pts = p2_lft;
+    }
+  } else
+  {
+    if(  e2->Side == esRight )
+    {
+      //a b c z y x
+      ReversePolyPtLinks(p2_lft);
+      p1_rt->Next = p2_rt;
+      p2_rt->Prev = p1_rt;
+      p2_lft->Next = p1_lft;
+      p1_lft->Prev = p2_lft;
+    } else
+    {
+      //a b c x y z
+      p1_rt->Next = p2_lft;
+      p2_lft->Prev = p1_rt;
+      p1_lft->Prev = p2_rt;
+      p2_rt->Next = p1_lft;
+    }
+  }
+
+  outRec1->BottomPt = 0;
+  if (holeStateRec == outRec2)
+  {
+    if (outRec2->FirstLeft != outRec1)
+      outRec1->FirstLeft = outRec2->FirstLeft;
+    outRec1->IsHole = outRec2->IsHole;
+  }
+  outRec2->Pts = 0;
+  outRec2->BottomPt = 0;
+  outRec2->FirstLeft = outRec1;
+
+  int OKIdx = e1->OutIdx;
+  int ObsoleteIdx = e2->OutIdx;
+
+  e1->OutIdx = Unassigned; //nb: safe because we only get here via AddLocalMaxPoly
+  e2->OutIdx = Unassigned;
+
+  TEdge* e = m_ActiveEdges;
+  while( e )
+  {
+    if( e->OutIdx == ObsoleteIdx )
+    {
+      e->OutIdx = OKIdx;
+      e->Side = e1->Side;
+      break;
+    }
+    e = e->NextInAEL;
+  }
+
+  outRec2->Idx = outRec1->Idx;
+}
+//------------------------------------------------------------------------------
+
+OutPt* Clipper::AddOutPt(TEdge *e, const IntPoint &pt)
+{
+  if(  e->OutIdx < 0 )
+  {
+    OutRec *outRec = CreateOutRec();
+    outRec->IsOpen = (e->WindDelta == 0);
+    OutPt* newOp = new OutPt;
+    outRec->Pts = newOp;
+    newOp->Idx = outRec->Idx;
+    newOp->Pt = pt;
+    newOp->Next = newOp;
+    newOp->Prev = newOp;
+    if (!outRec->IsOpen)
+      SetHoleState(e, outRec);
+    e->OutIdx = outRec->Idx;
+    return newOp;
+  } else
+  {
+    OutRec *outRec = m_PolyOuts[e->OutIdx];
+    //OutRec.Pts is the 'Left-most' point & OutRec.Pts.Prev is the 'Right-most'
+    OutPt* op = outRec->Pts;
+
+	bool ToFront = (e->Side == esLeft);
+	if (ToFront && (pt == op->Pt)) return op;
+    else if (!ToFront && (pt == op->Prev->Pt)) return op->Prev;
+
+    OutPt* newOp = new OutPt;
+    newOp->Idx = outRec->Idx;
+    newOp->Pt = pt;
+    newOp->Next = op;
+    newOp->Prev = op->Prev;
+    newOp->Prev->Next = newOp;
+    op->Prev = newOp;
+    if (ToFront) outRec->Pts = newOp;
+    return newOp;
+  }
+}
+//------------------------------------------------------------------------------
+
+OutPt* Clipper::GetLastOutPt(TEdge *e)
+{
+	OutRec *outRec = m_PolyOuts[e->OutIdx];
+	if (e->Side == esLeft)
+		return outRec->Pts;
+	else
+		return outRec->Pts->Prev;
+}
+//------------------------------------------------------------------------------
+
+void Clipper::ProcessHorizontals()
+{
+  TEdge* horzEdge;
+  while (PopEdgeFromSEL(horzEdge))
+    ProcessHorizontal(horzEdge);
+}
+//------------------------------------------------------------------------------
+
+inline bool IsMinima(TEdge *e)
+{
+  return e  && (e->Prev->NextInLML != e) && (e->Next->NextInLML != e);
+}
+//------------------------------------------------------------------------------
+
+inline bool IsMaxima(TEdge *e, const cInt Y)
+{
+  return e && e->Top.Y == Y && !e->NextInLML;
+}
+//------------------------------------------------------------------------------
+
+inline bool IsIntermediate(TEdge *e, const cInt Y)
+{
+  return e->Top.Y == Y && e->NextInLML;
+}
+//------------------------------------------------------------------------------
+
+TEdge *GetMaximaPair(TEdge *e)
+{
+  if ((e->Next->Top == e->Top) && !e->Next->NextInLML)
+    return e->Next;
+  else if ((e->Prev->Top == e->Top) && !e->Prev->NextInLML)
+    return e->Prev;
+  else return 0;
+}
+//------------------------------------------------------------------------------
+
+TEdge *GetMaximaPairEx(TEdge *e)
+{
+  //as GetMaximaPair() but returns 0 if MaxPair isn't in AEL (unless it's horizontal)
+  TEdge* result = GetMaximaPair(e);
+  if (result && (result->OutIdx == Skip ||
+    (result->NextInAEL == result->PrevInAEL && !IsHorizontal(*result)))) return 0;
+  return result;
+}
+//------------------------------------------------------------------------------
+
+void Clipper::SwapPositionsInSEL(TEdge *Edge1, TEdge *Edge2)
+{
+  if(  !( Edge1->NextInSEL ) &&  !( Edge1->PrevInSEL ) ) return;
+  if(  !( Edge2->NextInSEL ) &&  !( Edge2->PrevInSEL ) ) return;
+
+  if(  Edge1->NextInSEL == Edge2 )
+  {
+    TEdge* Next = Edge2->NextInSEL;
+    if( Next ) Next->PrevInSEL = Edge1;
+    TEdge* Prev = Edge1->PrevInSEL;
+    if( Prev ) Prev->NextInSEL = Edge2;
+    Edge2->PrevInSEL = Prev;
+    Edge2->NextInSEL = Edge1;
+    Edge1->PrevInSEL = Edge2;
+    Edge1->NextInSEL = Next;
+  }
+  else if(  Edge2->NextInSEL == Edge1 )
+  {
+    TEdge* Next = Edge1->NextInSEL;
+    if( Next ) Next->PrevInSEL = Edge2;
+    TEdge* Prev = Edge2->PrevInSEL;
+    if( Prev ) Prev->NextInSEL = Edge1;
+    Edge1->PrevInSEL = Prev;
+    Edge1->NextInSEL = Edge2;
+    Edge2->PrevInSEL = Edge1;
+    Edge2->NextInSEL = Next;
+  }
+  else
+  {
+    TEdge* Next = Edge1->NextInSEL;
+    TEdge* Prev = Edge1->PrevInSEL;
+    Edge1->NextInSEL = Edge2->NextInSEL;
+    if( Edge1->NextInSEL ) Edge1->NextInSEL->PrevInSEL = Edge1;
+    Edge1->PrevInSEL = Edge2->PrevInSEL;
+    if( Edge1->PrevInSEL ) Edge1->PrevInSEL->NextInSEL = Edge1;
+    Edge2->NextInSEL = Next;
+    if( Edge2->NextInSEL ) Edge2->NextInSEL->PrevInSEL = Edge2;
+    Edge2->PrevInSEL = Prev;
+    if( Edge2->PrevInSEL ) Edge2->PrevInSEL->NextInSEL = Edge2;
+  }
+
+  if( !Edge1->PrevInSEL ) m_SortedEdges = Edge1;
+  else if( !Edge2->PrevInSEL ) m_SortedEdges = Edge2;
+}
+//------------------------------------------------------------------------------
+
+TEdge* GetNextInAEL(TEdge *e, Direction dir)
+{
+  return dir == dLeftToRight ? e->NextInAEL : e->PrevInAEL;
+}
+//------------------------------------------------------------------------------
+
+void GetHorzDirection(TEdge& HorzEdge, Direction& Dir, cInt& Left, cInt& Right)
+{
+  if (HorzEdge.Bot.X < HorzEdge.Top.X)
+  {
+    Left = HorzEdge.Bot.X;
+    Right = HorzEdge.Top.X;
+    Dir = dLeftToRight;
+  } else
+  {
+    Left = HorzEdge.Top.X;
+    Right = HorzEdge.Bot.X;
+    Dir = dRightToLeft;
+  }
+}
+//------------------------------------------------------------------------
+
+/*******************************************************************************
+* Notes: Horizontal edges (HEs) at scanline intersections (ie at the Top or    *
+* Bottom of a scanbeam) are processed as if layered. The order in which HEs    *
+* are processed doesn't matter. HEs intersect with other HE Bot.Xs only [#]    *
+* (or they could intersect with Top.Xs only, ie EITHER Bot.Xs OR Top.Xs),      *
+* and with other non-horizontal edges [*]. Once these intersections are        *
+* processed, intermediate HEs then 'promote' the Edge above (NextInLML) into   *
+* the AEL. These 'promoted' edges may in turn intersect [%] with other HEs.    *
+*******************************************************************************/
+
+void Clipper::ProcessHorizontal(TEdge *horzEdge)
+{
+  Direction dir;
+  cInt horzLeft, horzRight;
+  bool IsOpen = (horzEdge->WindDelta == 0);
+
+  GetHorzDirection(*horzEdge, dir, horzLeft, horzRight);
+
+  TEdge* eLastHorz = horzEdge, *eMaxPair = 0;
+  while (eLastHorz->NextInLML && IsHorizontal(*eLastHorz->NextInLML))
+    eLastHorz = eLastHorz->NextInLML;
+  if (!eLastHorz->NextInLML)
+    eMaxPair = GetMaximaPair(eLastHorz);
+
+  MaximaList::const_iterator maxIt;
+  MaximaList::const_reverse_iterator maxRit;
+  if (m_Maxima.size() > 0)
+  {
+      //get the first maxima in range (X) ...
+      if (dir == dLeftToRight)
+      {
+          maxIt = m_Maxima.begin();
+          while (maxIt != m_Maxima.end() && *maxIt <= horzEdge->Bot.X) maxIt++;
+          if (maxIt != m_Maxima.end() && *maxIt >= eLastHorz->Top.X)
+              maxIt = m_Maxima.end();
+      }
+      else
+      {
+          maxRit = m_Maxima.rbegin();
+          while (maxRit != m_Maxima.rend() && *maxRit > horzEdge->Bot.X) maxRit++;
+          if (maxRit != m_Maxima.rend() && *maxRit <= eLastHorz->Top.X)
+              maxRit = m_Maxima.rend();
+      }
+  }
+
+  OutPt* op1 = 0;
+
+  for (;;) //loop through consec. horizontal edges
+  {
+
+    bool IsLastHorz = (horzEdge == eLastHorz);
+    TEdge* e = GetNextInAEL(horzEdge, dir);
+    while(e)
+    {
+
+        //this code block inserts extra coords into horizontal edges (in output
+        //polygons) whereever maxima touch these horizontal edges. This helps
+        //'simplifying' polygons (ie if the Simplify property is set).
+        if (m_Maxima.size() > 0)
+        {
+            if (dir == dLeftToRight)
+            {
+                while (maxIt != m_Maxima.end() && *maxIt < e->Curr.X)
+                {
+                  if (horzEdge->OutIdx >= 0 && !IsOpen)
+                    AddOutPt(horzEdge, IntPoint(*maxIt, horzEdge->Bot.Y));
+                  maxIt++;
+                }
+            }
+            else
+            {
+                while (maxRit != m_Maxima.rend() && *maxRit > e->Curr.X)
+                {
+                  if (horzEdge->OutIdx >= 0 && !IsOpen)
+                    AddOutPt(horzEdge, IntPoint(*maxRit, horzEdge->Bot.Y));
+                  maxRit++;
+                }
+            }
+        };
+
+        if ((dir == dLeftToRight && e->Curr.X > horzRight) ||
+			(dir == dRightToLeft && e->Curr.X < horzLeft)) break;
+
+		//Also break if we've got to the end of an intermediate horizontal edge ...
+		//nb: Smaller Dx's are to the right of larger Dx's ABOVE the horizontal.
+		if (e->Curr.X == horzEdge->Top.X && horzEdge->NextInLML &&
+			e->Dx < horzEdge->NextInLML->Dx) break;
+
+    if (horzEdge->OutIdx >= 0 && !IsOpen)  //note: may be done multiple times
+		{
+#ifdef use_xyz
+			if (dir == dLeftToRight) SetZ(e->Curr, *horzEdge, *e);
+			else SetZ(e->Curr, *e, *horzEdge);
+#endif
+			op1 = AddOutPt(horzEdge, e->Curr);
+			TEdge* eNextHorz = m_SortedEdges;
+			while (eNextHorz)
+			{
+				if (eNextHorz->OutIdx >= 0 &&
+					HorzSegmentsOverlap(horzEdge->Bot.X,
+					horzEdge->Top.X, eNextHorz->Bot.X, eNextHorz->Top.X))
+				{
+                    OutPt* op2 = GetLastOutPt(eNextHorz);
+                    AddJoin(op2, op1, eNextHorz->Top);
+				}
+				eNextHorz = eNextHorz->NextInSEL;
+			}
+			AddGhostJoin(op1, horzEdge->Bot);
+		}
+
+		//OK, so far we're still in range of the horizontal Edge  but make sure
+        //we're at the last of consec. horizontals when matching with eMaxPair
+        if(e == eMaxPair && IsLastHorz)
+        {
+          if (horzEdge->OutIdx >= 0)
+            AddLocalMaxPoly(horzEdge, eMaxPair, horzEdge->Top);
+          DeleteFromAEL(horzEdge);
+          DeleteFromAEL(eMaxPair);
+          return;
+        }
+
+		if(dir == dLeftToRight)
+        {
+          IntPoint Pt = IntPoint(e->Curr.X, horzEdge->Curr.Y);
+          IntersectEdges(horzEdge, e, Pt);
+        }
+        else
+        {
+          IntPoint Pt = IntPoint(e->Curr.X, horzEdge->Curr.Y);
+          IntersectEdges( e, horzEdge, Pt);
+        }
+        TEdge* eNext = GetNextInAEL(e, dir);
+        SwapPositionsInAEL( horzEdge, e );
+        e = eNext;
+    } //end while(e)
+
+	//Break out of loop if HorzEdge.NextInLML is not also horizontal ...
+	if (!horzEdge->NextInLML || !IsHorizontal(*horzEdge->NextInLML)) break;
+
+	UpdateEdgeIntoAEL(horzEdge);
+    if (horzEdge->OutIdx >= 0) AddOutPt(horzEdge, horzEdge->Bot);
+    GetHorzDirection(*horzEdge, dir, horzLeft, horzRight);
+
+  } //end for (;;)
+
+  if (horzEdge->OutIdx >= 0 && !op1)
+  {
+      op1 = GetLastOutPt(horzEdge);
+      TEdge* eNextHorz = m_SortedEdges;
+      while (eNextHorz)
+      {
+          if (eNextHorz->OutIdx >= 0 &&
+              HorzSegmentsOverlap(horzEdge->Bot.X,
+              horzEdge->Top.X, eNextHorz->Bot.X, eNextHorz->Top.X))
+          {
+              OutPt* op2 = GetLastOutPt(eNextHorz);
+              AddJoin(op2, op1, eNextHorz->Top);
+          }
+          eNextHorz = eNextHorz->NextInSEL;
+      }
+      AddGhostJoin(op1, horzEdge->Top);
+  }
+
+  if (horzEdge->NextInLML)
+  {
+    if(horzEdge->OutIdx >= 0)
+    {
+      op1 = AddOutPt( horzEdge, horzEdge->Top);
+      UpdateEdgeIntoAEL(horzEdge);
+      if (horzEdge->WindDelta == 0) return;
+      //nb: HorzEdge is no longer horizontal here
+      TEdge* ePrev = horzEdge->PrevInAEL;
+      TEdge* eNext = horzEdge->NextInAEL;
+      if (ePrev && ePrev->Curr.X == horzEdge->Bot.X &&
+        ePrev->Curr.Y == horzEdge->Bot.Y && ePrev->WindDelta != 0 &&
+        (ePrev->OutIdx >= 0 && ePrev->Curr.Y > ePrev->Top.Y &&
+        SlopesEqual(*horzEdge, *ePrev, m_UseFullRange)))
+      {
+        OutPt* op2 = AddOutPt(ePrev, horzEdge->Bot);
+        AddJoin(op1, op2, horzEdge->Top);
+      }
+      else if (eNext && eNext->Curr.X == horzEdge->Bot.X &&
+        eNext->Curr.Y == horzEdge->Bot.Y && eNext->WindDelta != 0 &&
+        eNext->OutIdx >= 0 && eNext->Curr.Y > eNext->Top.Y &&
+        SlopesEqual(*horzEdge, *eNext, m_UseFullRange))
+      {
+        OutPt* op2 = AddOutPt(eNext, horzEdge->Bot);
+        AddJoin(op1, op2, horzEdge->Top);
+      }
+    }
+    else
+      UpdateEdgeIntoAEL(horzEdge);
+  }
+  else
+  {
+    if (horzEdge->OutIdx >= 0) AddOutPt(horzEdge, horzEdge->Top);
+    DeleteFromAEL(horzEdge);
+  }
+}
+//------------------------------------------------------------------------------
+
+bool Clipper::ProcessIntersections(const cInt topY)
+{
+  if( !m_ActiveEdges ) return true;
+  try {
+    BuildIntersectList(topY);
+    size_t IlSize = m_IntersectList.size();
+    if (IlSize == 0) return true;
+    if (IlSize == 1 || FixupIntersectionOrder()) ProcessIntersectList();
+    else return false;
+  }
+  catch(...)
+  {
+    m_SortedEdges = 0;
+    DisposeIntersectNodes();
+    throw clipperException("ProcessIntersections error");
+  }
+  m_SortedEdges = 0;
+  return true;
+}
+//------------------------------------------------------------------------------
+
+void Clipper::DisposeIntersectNodes()
+{
+  for (size_t i = 0; i < m_IntersectList.size(); ++i )
+    delete m_IntersectList[i];
+  m_IntersectList.clear();
+}
+//------------------------------------------------------------------------------
+
+void Clipper::BuildIntersectList(const cInt topY)
+{
+  if ( !m_ActiveEdges ) return;
+
+  //prepare for sorting ...
+  TEdge* e = m_ActiveEdges;
+  m_SortedEdges = e;
+  while( e )
+  {
+    e->PrevInSEL = e->PrevInAEL;
+    e->NextInSEL = e->NextInAEL;
+    e->Curr.X = TopX( *e, topY );
+    e = e->NextInAEL;
+  }
+
+  //bubblesort ...
+  bool isModified;
+  do
+  {
+    isModified = false;
+    e = m_SortedEdges;
+    while( e->NextInSEL )
+    {
+      TEdge *eNext = e->NextInSEL;
+      IntPoint Pt;
+      if(e->Curr.X > eNext->Curr.X)
+      {
+        IntersectPoint(*e, *eNext, Pt);
+        if (Pt.Y < topY) Pt = IntPoint(TopX(*e, topY), topY);
+        IntersectNode * newNode = new IntersectNode;
+        newNode->Edge1 = e;
+        newNode->Edge2 = eNext;
+        newNode->Pt = Pt;
+        m_IntersectList.push_back(newNode);
+
+        SwapPositionsInSEL(e, eNext);
+        isModified = true;
+      }
+      else
+        e = eNext;
+    }
+    if( e->PrevInSEL ) e->PrevInSEL->NextInSEL = 0;
+    else break;
+  }
+  while ( isModified );
+  m_SortedEdges = 0; //important
+}
+//------------------------------------------------------------------------------
+
+
+void Clipper::ProcessIntersectList()
+{
+  for (size_t i = 0; i < m_IntersectList.size(); ++i)
+  {
+    IntersectNode* iNode = m_IntersectList[i];
+    {
+      IntersectEdges( iNode->Edge1, iNode->Edge2, iNode->Pt);
+      SwapPositionsInAEL( iNode->Edge1 , iNode->Edge2 );
+    }
+    delete iNode;
+  }
+  m_IntersectList.clear();
+}
+//------------------------------------------------------------------------------
+
+bool IntersectListSort(IntersectNode* node1, IntersectNode* node2)
+{
+  return node2->Pt.Y < node1->Pt.Y;
+}
+//------------------------------------------------------------------------------
+
+inline bool EdgesAdjacent(const IntersectNode &inode)
+{
+  return (inode.Edge1->NextInSEL == inode.Edge2) ||
+    (inode.Edge1->PrevInSEL == inode.Edge2);
+}
+//------------------------------------------------------------------------------
+
+bool Clipper::FixupIntersectionOrder()
+{
+  //pre-condition: intersections are sorted Bottom-most first.
+  //Now it's crucial that intersections are made only between adjacent edges,
+  //so to ensure this the order of intersections may need adjusting ...
+  CopyAELToSEL();
+  std::sort(m_IntersectList.begin(), m_IntersectList.end(), IntersectListSort);
+  size_t cnt = m_IntersectList.size();
+  for (size_t i = 0; i < cnt; ++i)
+  {
+    if (!EdgesAdjacent(*m_IntersectList[i]))
+    {
+      size_t j = i + 1;
+      while (j < cnt && !EdgesAdjacent(*m_IntersectList[j])) j++;
+      if (j == cnt)  return false;
+      std::swap(m_IntersectList[i], m_IntersectList[j]);
+    }
+    SwapPositionsInSEL(m_IntersectList[i]->Edge1, m_IntersectList[i]->Edge2);
+  }
+  return true;
+}
+//------------------------------------------------------------------------------
+
+void Clipper::DoMaxima(TEdge *e)
+{
+  TEdge* eMaxPair = GetMaximaPairEx(e);
+  if (!eMaxPair)
+  {
+    if (e->OutIdx >= 0)
+      AddOutPt(e, e->Top);
+    DeleteFromAEL(e);
+    return;
+  }
+
+  TEdge* eNext = e->NextInAEL;
+  while(eNext && eNext != eMaxPair)
+  {
+    IntersectEdges(e, eNext, e->Top);
+    SwapPositionsInAEL(e, eNext);
+    eNext = e->NextInAEL;
+  }
+
+  if(e->OutIdx == Unassigned && eMaxPair->OutIdx == Unassigned)
+  {
+    DeleteFromAEL(e);
+    DeleteFromAEL(eMaxPair);
+  }
+  else if( e->OutIdx >= 0 && eMaxPair->OutIdx >= 0 )
+  {
+    if (e->OutIdx >= 0) AddLocalMaxPoly(e, eMaxPair, e->Top);
+    DeleteFromAEL(e);
+    DeleteFromAEL(eMaxPair);
+  }
+#ifdef use_lines
+  else if (e->WindDelta == 0)
+  {
+    if (e->OutIdx >= 0)
+    {
+      AddOutPt(e, e->Top);
+      e->OutIdx = Unassigned;
+    }
+    DeleteFromAEL(e);
+
+    if (eMaxPair->OutIdx >= 0)
+    {
+      AddOutPt(eMaxPair, e->Top);
+      eMaxPair->OutIdx = Unassigned;
+    }
+    DeleteFromAEL(eMaxPair);
+  }
+#endif
+  else throw clipperException("DoMaxima error");
+}
+//------------------------------------------------------------------------------
+
+void Clipper::ProcessEdgesAtTopOfScanbeam(const cInt topY)
+{
+  TEdge* e = m_ActiveEdges;
+  while( e )
+  {
+    //1. process maxima, treating them as if they're 'bent' horizontal edges,
+    //   but exclude maxima with horizontal edges. nb: e can't be a horizontal.
+    bool IsMaximaEdge = IsMaxima(e, topY);
+
+    if(IsMaximaEdge)
+    {
+      TEdge* eMaxPair = GetMaximaPairEx(e);
+      IsMaximaEdge = (!eMaxPair || !IsHorizontal(*eMaxPair));
+    }
+
+    if(IsMaximaEdge)
+    {
+      if (m_StrictSimple) m_Maxima.push_back(e->Top.X);
+      TEdge* ePrev = e->PrevInAEL;
+      DoMaxima(e);
+      if( !ePrev ) e = m_ActiveEdges;
+      else e = ePrev->NextInAEL;
+    }
+    else
+    {
+      //2. promote horizontal edges, otherwise update Curr.X and Curr.Y ...
+      if (IsIntermediate(e, topY) && IsHorizontal(*e->NextInLML))
+      {
+        UpdateEdgeIntoAEL(e);
+        if (e->OutIdx >= 0)
+          AddOutPt(e, e->Bot);
+        AddEdgeToSEL(e);
+      }
+      else
+      {
+        e->Curr.X = TopX( *e, topY );
+        e->Curr.Y = topY;
+#ifdef use_xyz
+		e->Curr.Z = topY == e->Top.Y ? e->Top.Z : (topY == e->Bot.Y ? e->Bot.Z : 0);
+#endif
+	  }
+
+      //When StrictlySimple and 'e' is being touched by another edge, then
+      //make sure both edges have a vertex here ...
+      if (m_StrictSimple)
+      {
+        TEdge* ePrev = e->PrevInAEL;
+        if ((e->OutIdx >= 0) && (e->WindDelta != 0) && ePrev && (ePrev->OutIdx >= 0) &&
+          (ePrev->Curr.X == e->Curr.X) && (ePrev->WindDelta != 0))
+        {
+          IntPoint pt = e->Curr;
+#ifdef use_xyz
+          SetZ(pt, *ePrev, *e);
+#endif
+          OutPt* op = AddOutPt(ePrev, pt);
+          OutPt* op2 = AddOutPt(e, pt);
+          AddJoin(op, op2, pt); //StrictlySimple (type-3) join
+        }
+      }
+
+      e = e->NextInAEL;
+    }
+  }
+
+  //3. Process horizontals at the Top of the scanbeam ...
+  m_Maxima.sort();
+  ProcessHorizontals();
+  m_Maxima.clear();
+
+  //4. Promote intermediate vertices ...
+  e = m_ActiveEdges;
+  while(e)
+  {
+    if(IsIntermediate(e, topY))
+    {
+      OutPt* op = 0;
+      if( e->OutIdx >= 0 )
+        op = AddOutPt(e, e->Top);
+      UpdateEdgeIntoAEL(e);
+
+      //if output polygons share an edge, they'll need joining later ...
+      TEdge* ePrev = e->PrevInAEL;
+      TEdge* eNext = e->NextInAEL;
+      if (ePrev && ePrev->Curr.X == e->Bot.X &&
+        ePrev->Curr.Y == e->Bot.Y && op &&
+        ePrev->OutIdx >= 0 && ePrev->Curr.Y > ePrev->Top.Y &&
+        SlopesEqual(e->Curr, e->Top, ePrev->Curr, ePrev->Top, m_UseFullRange) &&
+        (e->WindDelta != 0) && (ePrev->WindDelta != 0))
+      {
+        OutPt* op2 = AddOutPt(ePrev, e->Bot);
+        AddJoin(op, op2, e->Top);
+      }
+      else if (eNext && eNext->Curr.X == e->Bot.X &&
+        eNext->Curr.Y == e->Bot.Y && op &&
+        eNext->OutIdx >= 0 && eNext->Curr.Y > eNext->Top.Y &&
+        SlopesEqual(e->Curr, e->Top, eNext->Curr, eNext->Top, m_UseFullRange) &&
+        (e->WindDelta != 0) && (eNext->WindDelta != 0))
+      {
+        OutPt* op2 = AddOutPt(eNext, e->Bot);
+        AddJoin(op, op2, e->Top);
+      }
+    }
+    e = e->NextInAEL;
+  }
+}
+//------------------------------------------------------------------------------
+
+void Clipper::FixupOutPolyline(OutRec &outrec)
+{
+  OutPt *pp = outrec.Pts;
+  OutPt *lastPP = pp->Prev;
+  while (pp != lastPP)
+  {
+    pp = pp->Next;
+    if (pp->Pt == pp->Prev->Pt)
+    {
+      if (pp == lastPP) lastPP = pp->Prev;
+      OutPt *tmpPP = pp->Prev;
+      tmpPP->Next = pp->Next;
+      pp->Next->Prev = tmpPP;
+      delete pp;
+      pp = tmpPP;
+    }
+  }
+
+  if (pp == pp->Prev)
+  {
+    DisposeOutPts(pp);
+    outrec.Pts = 0;
+    return;
+  }
+}
+//------------------------------------------------------------------------------
+
+void Clipper::FixupOutPolygon(OutRec &outrec)
+{
+    //FixupOutPolygon() - removes duplicate points and simplifies consecutive
+    //parallel edges by removing the middle vertex.
+    OutPt *lastOK = 0;
+    outrec.BottomPt = 0;
+    OutPt *pp = outrec.Pts;
+    bool preserveCol = m_PreserveCollinear || m_StrictSimple;
+
+    for (;;)
+    {
+        if (pp->Prev == pp || pp->Prev == pp->Next)
+        {
+            DisposeOutPts(pp);
+            outrec.Pts = 0;
+            return;
+        }
+
+        //test for duplicate points and collinear edges ...
+        if ((pp->Pt == pp->Next->Pt) || (pp->Pt == pp->Prev->Pt) ||
+            (SlopesEqual(pp->Prev->Pt, pp->Pt, pp->Next->Pt, m_UseFullRange) &&
+            (!preserveCol || !Pt2IsBetweenPt1AndPt3(pp->Prev->Pt, pp->Pt, pp->Next->Pt))))
+        {
+            lastOK = 0;
+            OutPt *tmp = pp;
+            pp->Prev->Next = pp->Next;
+            pp->Next->Prev = pp->Prev;
+            pp = pp->Prev;
+            delete tmp;
+        }
+        else if (pp == lastOK) break;
+        else
+        {
+            if (!lastOK) lastOK = pp;
+            pp = pp->Next;
+        }
+    }
+    outrec.Pts = pp;
+}
+//------------------------------------------------------------------------------
+
+int PointCount(OutPt *Pts)
+{
+    if (!Pts) return 0;
+    int result = 0;
+    OutPt* p = Pts;
+    do
+    {
+        result++;
+        p = p->Next;
+    }
+    while (p != Pts);
+    return result;
+}
+//------------------------------------------------------------------------------
+
+void Clipper::BuildResult(Paths &polys)
+{
+  polys.reserve(m_PolyOuts.size());
+  for (PolyOutList::size_type i = 0; i < m_PolyOuts.size(); ++i)
+  {
+    if (!m_PolyOuts[i]->Pts) continue;
+    Path pg;
+    OutPt* p = m_PolyOuts[i]->Pts->Prev;
+    int cnt = PointCount(p);
+    if (cnt < 2) continue;
+    pg.reserve(cnt);
+    for (int i = 0; i < cnt; ++i)
+    {
+      pg.push_back(p->Pt);
+      p = p->Prev;
+    }
+    polys.push_back(pg);
+  }
+}
+//------------------------------------------------------------------------------
+
+void Clipper::BuildResult2(PolyTree& polytree)
+{
+    polytree.Clear();
+    polytree.AllNodes.reserve(m_PolyOuts.size());
+    //add each output polygon/contour to polytree ...
+    for (PolyOutList::size_type i = 0; i < m_PolyOuts.size(); i++)
+    {
+        OutRec* outRec = m_PolyOuts[i];
+        int cnt = PointCount(outRec->Pts);
+        if ((outRec->IsOpen && cnt < 2) || (!outRec->IsOpen && cnt < 3)) continue;
+        FixHoleLinkage(*outRec);
+        PolyNode* pn = new PolyNode();
+        //nb: polytree takes ownership of all the PolyNodes
+        polytree.AllNodes.push_back(pn);
+        outRec->PolyNd = pn;
+        pn->Parent = 0;
+        pn->Index = 0;
+        pn->Contour.reserve(cnt);
+        OutPt *op = outRec->Pts->Prev;
+        for (int j = 0; j < cnt; j++)
+        {
+            pn->Contour.push_back(op->Pt);
+            op = op->Prev;
+        }
+    }
+
+    //fixup PolyNode links etc ...
+    polytree.Childs.reserve(m_PolyOuts.size());
+    for (PolyOutList::size_type i = 0; i < m_PolyOuts.size(); i++)
+    {
+        OutRec* outRec = m_PolyOuts[i];
+        if (!outRec->PolyNd) continue;
+        if (outRec->IsOpen)
+        {
+          outRec->PolyNd->m_IsOpen = true;
+          polytree.AddChild(*outRec->PolyNd);
+        }
+        else if (outRec->FirstLeft && outRec->FirstLeft->PolyNd)
+          outRec->FirstLeft->PolyNd->AddChild(*outRec->PolyNd);
+        else
+          polytree.AddChild(*outRec->PolyNd);
+    }
+}
+//------------------------------------------------------------------------------
+
+void SwapIntersectNodes(IntersectNode &int1, IntersectNode &int2)
+{
+  //just swap the contents (because fIntersectNodes is a single-linked-list)
+  IntersectNode inode = int1; //gets a copy of Int1
+  int1.Edge1 = int2.Edge1;
+  int1.Edge2 = int2.Edge2;
+  int1.Pt = int2.Pt;
+  int2.Edge1 = inode.Edge1;
+  int2.Edge2 = inode.Edge2;
+  int2.Pt = inode.Pt;
+}
+//------------------------------------------------------------------------------
+
+inline bool E2InsertsBeforeE1(TEdge &e1, TEdge &e2)
+{
+  if (e2.Curr.X == e1.Curr.X)
+  {
+    if (e2.Top.Y > e1.Top.Y)
+      return e2.Top.X < TopX(e1, e2.Top.Y);
+      else return e1.Top.X > TopX(e2, e1.Top.Y);
+  }
+  else return e2.Curr.X < e1.Curr.X;
+}
+//------------------------------------------------------------------------------
+
+bool GetOverlap(const cInt a1, const cInt a2, const cInt b1, const cInt b2,
+    cInt& Left, cInt& Right)
+{
+  if (a1 < a2)
+  {
+    if (b1 < b2) {Left = std::max(a1,b1); Right = std::min(a2,b2);}
+    else {Left = std::max(a1,b2); Right = std::min(a2,b1);}
+  }
+  else
+  {
+    if (b1 < b2) {Left = std::max(a2,b1); Right = std::min(a1,b2);}
+    else {Left = std::max(a2,b2); Right = std::min(a1,b1);}
+  }
+  return Left < Right;
+}
+//------------------------------------------------------------------------------
+
+inline void UpdateOutPtIdxs(OutRec& outrec)
+{
+  OutPt* op = outrec.Pts;
+  do
+  {
+    op->Idx = outrec.Idx;
+    op = op->Prev;
+  }
+  while(op != outrec.Pts);
+}
+//------------------------------------------------------------------------------
+
+void Clipper::InsertEdgeIntoAEL(TEdge *edge, TEdge* startEdge)
+{
+  if(!m_ActiveEdges)
+  {
+    edge->PrevInAEL = 0;
+    edge->NextInAEL = 0;
+    m_ActiveEdges = edge;
+  }
+  else if(!startEdge && E2InsertsBeforeE1(*m_ActiveEdges, *edge))
+  {
+      edge->PrevInAEL = 0;
+      edge->NextInAEL = m_ActiveEdges;
+      m_ActiveEdges->PrevInAEL = edge;
+      m_ActiveEdges = edge;
+  }
+  else
+  {
+    if(!startEdge) startEdge = m_ActiveEdges;
+    while(startEdge->NextInAEL  &&
+      !E2InsertsBeforeE1(*startEdge->NextInAEL , *edge))
+        startEdge = startEdge->NextInAEL;
+    edge->NextInAEL = startEdge->NextInAEL;
+    if(startEdge->NextInAEL) startEdge->NextInAEL->PrevInAEL = edge;
+    edge->PrevInAEL = startEdge;
+    startEdge->NextInAEL = edge;
+  }
+}
+//----------------------------------------------------------------------
+
+OutPt* DupOutPt(OutPt* outPt, bool InsertAfter)
+{
+  OutPt* result = new OutPt;
+  result->Pt = outPt->Pt;
+  result->Idx = outPt->Idx;
+  if (InsertAfter)
+  {
+    result->Next = outPt->Next;
+    result->Prev = outPt;
+    outPt->Next->Prev = result;
+    outPt->Next = result;
+  }
+  else
+  {
+    result->Prev = outPt->Prev;
+    result->Next = outPt;
+    outPt->Prev->Next = result;
+    outPt->Prev = result;
+  }
+  return result;
+}
+//------------------------------------------------------------------------------
+
+bool JoinHorz(OutPt* op1, OutPt* op1b, OutPt* op2, OutPt* op2b,
+  const IntPoint Pt, bool DiscardLeft)
+{
+  Direction Dir1 = (op1->Pt.X > op1b->Pt.X ? dRightToLeft : dLeftToRight);
+  Direction Dir2 = (op2->Pt.X > op2b->Pt.X ? dRightToLeft : dLeftToRight);
+  if (Dir1 == Dir2) return false;
+
+  //When DiscardLeft, we want Op1b to be on the Left of Op1, otherwise we
+  //want Op1b to be on the Right. (And likewise with Op2 and Op2b.)
+  //So, to facilitate this while inserting Op1b and Op2b ...
+  //when DiscardLeft, make sure we're AT or RIGHT of Pt before adding Op1b,
+  //otherwise make sure we're AT or LEFT of Pt. (Likewise with Op2b.)
+  if (Dir1 == dLeftToRight)
+  {
+    while (op1->Next->Pt.X <= Pt.X &&
+      op1->Next->Pt.X >= op1->Pt.X && op1->Next->Pt.Y == Pt.Y)
+        op1 = op1->Next;
+    if (DiscardLeft && (op1->Pt.X != Pt.X)) op1 = op1->Next;
+    op1b = DupOutPt(op1, !DiscardLeft);
+    if (op1b->Pt != Pt)
+    {
+      op1 = op1b;
+      op1->Pt = Pt;
+      op1b = DupOutPt(op1, !DiscardLeft);
+    }
+  }
+  else
+  {
+    while (op1->Next->Pt.X >= Pt.X &&
+      op1->Next->Pt.X <= op1->Pt.X && op1->Next->Pt.Y == Pt.Y)
+        op1 = op1->Next;
+    if (!DiscardLeft && (op1->Pt.X != Pt.X)) op1 = op1->Next;
+    op1b = DupOutPt(op1, DiscardLeft);
+    if (op1b->Pt != Pt)
+    {
+      op1 = op1b;
+      op1->Pt = Pt;
+      op1b = DupOutPt(op1, DiscardLeft);
+    }
+  }
+
+  if (Dir2 == dLeftToRight)
+  {
+    while (op2->Next->Pt.X <= Pt.X &&
+      op2->Next->Pt.X >= op2->Pt.X && op2->Next->Pt.Y == Pt.Y)
+        op2 = op2->Next;
+    if (DiscardLeft && (op2->Pt.X != Pt.X)) op2 = op2->Next;
+    op2b = DupOutPt(op2, !DiscardLeft);
+    if (op2b->Pt != Pt)
+    {
+      op2 = op2b;
+      op2->Pt = Pt;
+      op2b = DupOutPt(op2, !DiscardLeft);
+    };
+  } else
+  {
+    while (op2->Next->Pt.X >= Pt.X &&
+      op2->Next->Pt.X <= op2->Pt.X && op2->Next->Pt.Y == Pt.Y)
+        op2 = op2->Next;
+    if (!DiscardLeft && (op2->Pt.X != Pt.X)) op2 = op2->Next;
+    op2b = DupOutPt(op2, DiscardLeft);
+    if (op2b->Pt != Pt)
+    {
+      op2 = op2b;
+      op2->Pt = Pt;
+      op2b = DupOutPt(op2, DiscardLeft);
+    };
+  };
+
+  if ((Dir1 == dLeftToRight) == DiscardLeft)
+  {
+    op1->Prev = op2;
+    op2->Next = op1;
+    op1b->Next = op2b;
+    op2b->Prev = op1b;
+  }
+  else
+  {
+    op1->Next = op2;
+    op2->Prev = op1;
+    op1b->Prev = op2b;
+    op2b->Next = op1b;
+  }
+  return true;
+}
+//------------------------------------------------------------------------------
+
+bool Clipper::JoinPoints(Join *j, OutRec* outRec1, OutRec* outRec2)
+{
+  OutPt *op1 = j->OutPt1, *op1b;
+  OutPt *op2 = j->OutPt2, *op2b;
+
+  //There are 3 kinds of joins for output polygons ...
+  //1. Horizontal joins where Join.OutPt1 & Join.OutPt2 are vertices anywhere
+  //along (horizontal) collinear edges (& Join.OffPt is on the same horizontal).
+  //2. Non-horizontal joins where Join.OutPt1 & Join.OutPt2 are at the same
+  //location at the Bottom of the overlapping segment (& Join.OffPt is above).
+  //3. StrictSimple joins where edges touch but are not collinear and where
+  //Join.OutPt1, Join.OutPt2 & Join.OffPt all share the same point.
+  bool isHorizontal = (j->OutPt1->Pt.Y == j->OffPt.Y);
+
+  if (isHorizontal  && (j->OffPt == j->OutPt1->Pt) &&
+  (j->OffPt == j->OutPt2->Pt))
+  {
+    //Strictly Simple join ...
+    if (outRec1 != outRec2) return false;
+    op1b = j->OutPt1->Next;
+    while (op1b != op1 && (op1b->Pt == j->OffPt))
+      op1b = op1b->Next;
+    bool reverse1 = (op1b->Pt.Y > j->OffPt.Y);
+    op2b = j->OutPt2->Next;
+    while (op2b != op2 && (op2b->Pt == j->OffPt))
+      op2b = op2b->Next;
+    bool reverse2 = (op2b->Pt.Y > j->OffPt.Y);
+    if (reverse1 == reverse2) return false;
+    if (reverse1)
+    {
+      op1b = DupOutPt(op1, false);
+      op2b = DupOutPt(op2, true);
+      op1->Prev = op2;
+      op2->Next = op1;
+      op1b->Next = op2b;
+      op2b->Prev = op1b;
+      j->OutPt1 = op1;
+      j->OutPt2 = op1b;
+      return true;
+    } else
+    {
+      op1b = DupOutPt(op1, true);
+      op2b = DupOutPt(op2, false);
+      op1->Next = op2;
+      op2->Prev = op1;
+      op1b->Prev = op2b;
+      op2b->Next = op1b;
+      j->OutPt1 = op1;
+      j->OutPt2 = op1b;
+      return true;
+    }
+  }
+  else if (isHorizontal)
+  {
+    //treat horizontal joins differently to non-horizontal joins since with
+    //them we're not yet sure where the overlapping is. OutPt1.Pt & OutPt2.Pt
+    //may be anywhere along the horizontal edge.
+    op1b = op1;
+    while (op1->Prev->Pt.Y == op1->Pt.Y && op1->Prev != op1b && op1->Prev != op2)
+      op1 = op1->Prev;
+    while (op1b->Next->Pt.Y == op1b->Pt.Y && op1b->Next != op1 && op1b->Next != op2)
+      op1b = op1b->Next;
+    if (op1b->Next == op1 || op1b->Next == op2) return false; //a flat 'polygon'
+
+    op2b = op2;
+    while (op2->Prev->Pt.Y == op2->Pt.Y && op2->Prev != op2b && op2->Prev != op1b)
+      op2 = op2->Prev;
+    while (op2b->Next->Pt.Y == op2b->Pt.Y && op2b->Next != op2 && op2b->Next != op1)
+      op2b = op2b->Next;
+    if (op2b->Next == op2 || op2b->Next == op1) return false; //a flat 'polygon'
+
+    cInt Left, Right;
+    //Op1 --> Op1b & Op2 --> Op2b are the extremites of the horizontal edges
+    if (!GetOverlap(op1->Pt.X, op1b->Pt.X, op2->Pt.X, op2b->Pt.X, Left, Right))
+      return false;
+
+    //DiscardLeftSide: when overlapping edges are joined, a spike will created
+    //which needs to be cleaned up. However, we don't want Op1 or Op2 caught up
+    //on the discard Side as either may still be needed for other joins ...
+    IntPoint Pt;
+    bool DiscardLeftSide;
+    if (op1->Pt.X >= Left && op1->Pt.X <= Right)
+    {
+      Pt = op1->Pt; DiscardLeftSide = (op1->Pt.X > op1b->Pt.X);
+    }
+    else if (op2->Pt.X >= Left&& op2->Pt.X <= Right)
+    {
+      Pt = op2->Pt; DiscardLeftSide = (op2->Pt.X > op2b->Pt.X);
+    }
+    else if (op1b->Pt.X >= Left && op1b->Pt.X <= Right)
+    {
+      Pt = op1b->Pt; DiscardLeftSide = op1b->Pt.X > op1->Pt.X;
+    }
+    else
+    {
+      Pt = op2b->Pt; DiscardLeftSide = (op2b->Pt.X > op2->Pt.X);
+    }
+    j->OutPt1 = op1; j->OutPt2 = op2;
+    return JoinHorz(op1, op1b, op2, op2b, Pt, DiscardLeftSide);
+  } else
+  {
+    //nb: For non-horizontal joins ...
+    //    1. Jr.OutPt1.Pt.Y == Jr.OutPt2.Pt.Y
+    //    2. Jr.OutPt1.Pt > Jr.OffPt.Y
+
+    //make sure the polygons are correctly oriented ...
+    op1b = op1->Next;
+    while ((op1b->Pt == op1->Pt) && (op1b != op1)) op1b = op1b->Next;
+    bool Reverse1 = ((op1b->Pt.Y > op1->Pt.Y) ||
+      !SlopesEqual(op1->Pt, op1b->Pt, j->OffPt, m_UseFullRange));
+    if (Reverse1)
+    {
+      op1b = op1->Prev;
+      while ((op1b->Pt == op1->Pt) && (op1b != op1)) op1b = op1b->Prev;
+      if ((op1b->Pt.Y > op1->Pt.Y) ||
+        !SlopesEqual(op1->Pt, op1b->Pt, j->OffPt, m_UseFullRange)) return false;
+    };
+    op2b = op2->Next;
+    while ((op2b->Pt == op2->Pt) && (op2b != op2))op2b = op2b->Next;
+    bool Reverse2 = ((op2b->Pt.Y > op2->Pt.Y) ||
+      !SlopesEqual(op2->Pt, op2b->Pt, j->OffPt, m_UseFullRange));
+    if (Reverse2)
+    {
+      op2b = op2->Prev;
+      while ((op2b->Pt == op2->Pt) && (op2b != op2)) op2b = op2b->Prev;
+      if ((op2b->Pt.Y > op2->Pt.Y) ||
+        !SlopesEqual(op2->Pt, op2b->Pt, j->OffPt, m_UseFullRange)) return false;
+    }
+
+    if ((op1b == op1) || (op2b == op2) || (op1b == op2b) ||
+      ((outRec1 == outRec2) && (Reverse1 == Reverse2))) return false;
+
+    if (Reverse1)
+    {
+      op1b = DupOutPt(op1, false);
+      op2b = DupOutPt(op2, true);
+      op1->Prev = op2;
+      op2->Next = op1;
+      op1b->Next = op2b;
+      op2b->Prev = op1b;
+      j->OutPt1 = op1;
+      j->OutPt2 = op1b;
+      return true;
+    } else
+    {
+      op1b = DupOutPt(op1, true);
+      op2b = DupOutPt(op2, false);
+      op1->Next = op2;
+      op2->Prev = op1;
+      op1b->Prev = op2b;
+      op2b->Next = op1b;
+      j->OutPt1 = op1;
+      j->OutPt2 = op1b;
+      return true;
+    }
+  }
+}
+//----------------------------------------------------------------------
+
+static OutRec* ParseFirstLeft(OutRec* FirstLeft)
+{
+  while (FirstLeft && !FirstLeft->Pts)
+    FirstLeft = FirstLeft->FirstLeft;
+  return FirstLeft;
+}
+//------------------------------------------------------------------------------
+
+void Clipper::FixupFirstLefts1(OutRec* OldOutRec, OutRec* NewOutRec)
+{
+  //tests if NewOutRec contains the polygon before reassigning FirstLeft
+  for (PolyOutList::size_type i = 0; i < m_PolyOuts.size(); ++i)
+  {
+    OutRec* outRec = m_PolyOuts[i];
+    OutRec* firstLeft = ParseFirstLeft(outRec->FirstLeft);
+    if (outRec->Pts  && firstLeft == OldOutRec)
+    {
+      if (Poly2ContainsPoly1(outRec->Pts, NewOutRec->Pts))
+        outRec->FirstLeft = NewOutRec;
+    }
+  }
+}
+//----------------------------------------------------------------------
+
+void Clipper::FixupFirstLefts2(OutRec* InnerOutRec, OutRec* OuterOutRec)
+{
+  //A polygon has split into two such that one is now the inner of the other.
+  //It's possible that these polygons now wrap around other polygons, so check
+  //every polygon that's also contained by OuterOutRec's FirstLeft container
+  //(including 0) to see if they've become inner to the new inner polygon ...
+  OutRec* orfl = OuterOutRec->FirstLeft;
+  for (PolyOutList::size_type i = 0; i < m_PolyOuts.size(); ++i)
+  {
+    OutRec* outRec = m_PolyOuts[i];
+
+    if (!outRec->Pts || outRec == OuterOutRec || outRec == InnerOutRec)
+      continue;
+    OutRec* firstLeft = ParseFirstLeft(outRec->FirstLeft);
+    if (firstLeft != orfl && firstLeft != InnerOutRec && firstLeft != OuterOutRec)
+      continue;
+    if (Poly2ContainsPoly1(outRec->Pts, InnerOutRec->Pts))
+      outRec->FirstLeft = InnerOutRec;
+    else if (Poly2ContainsPoly1(outRec->Pts, OuterOutRec->Pts))
+      outRec->FirstLeft = OuterOutRec;
+    else if (outRec->FirstLeft == InnerOutRec || outRec->FirstLeft == OuterOutRec)
+      outRec->FirstLeft = orfl;
+  }
+}
+//----------------------------------------------------------------------
+void Clipper::FixupFirstLefts3(OutRec* OldOutRec, OutRec* NewOutRec)
+{
+  //reassigns FirstLeft WITHOUT testing if NewOutRec contains the polygon
+  for (PolyOutList::size_type i = 0; i < m_PolyOuts.size(); ++i)
+  {
+    OutRec* outRec = m_PolyOuts[i];
+    OutRec* firstLeft = ParseFirstLeft(outRec->FirstLeft);
+    if (outRec->Pts && firstLeft == OldOutRec)
+      outRec->FirstLeft = NewOutRec;
+  }
+}
+//----------------------------------------------------------------------
+
+void Clipper::JoinCommonEdges()
+{
+  for (JoinList::size_type i = 0; i < m_Joins.size(); i++)
+  {
+    Join* join = m_Joins[i];
+
+    OutRec *outRec1 = GetOutRec(join->OutPt1->Idx);
+    OutRec *outRec2 = GetOutRec(join->OutPt2->Idx);
+
+    if (!outRec1->Pts || !outRec2->Pts) continue;
+    if (outRec1->IsOpen || outRec2->IsOpen) continue;
+
+    //get the polygon fragment with the correct hole state (FirstLeft)
+    //before calling JoinPoints() ...
+    OutRec *holeStateRec;
+    if (outRec1 == outRec2) holeStateRec = outRec1;
+    else if (OutRec1RightOfOutRec2(outRec1, outRec2)) holeStateRec = outRec2;
+    else if (OutRec1RightOfOutRec2(outRec2, outRec1)) holeStateRec = outRec1;
+    else holeStateRec = GetLowermostRec(outRec1, outRec2);
+
+    if (!JoinPoints(join, outRec1, outRec2)) continue;
+
+    if (outRec1 == outRec2)
+    {
+      //instead of joining two polygons, we've just created a new one by
+      //splitting one polygon into two.
+      outRec1->Pts = join->OutPt1;
+      outRec1->BottomPt = 0;
+      outRec2 = CreateOutRec();
+      outRec2->Pts = join->OutPt2;
+
+      //update all OutRec2.Pts Idx's ...
+      UpdateOutPtIdxs(*outRec2);
+
+      if (Poly2ContainsPoly1(outRec2->Pts, outRec1->Pts))
+      {
+        //outRec1 contains outRec2 ...
+        outRec2->IsHole = !outRec1->IsHole;
+        outRec2->FirstLeft = outRec1;
+
+        if (m_UsingPolyTree) FixupFirstLefts2(outRec2, outRec1);
+
+        if ((outRec2->IsHole ^ m_ReverseOutput) == (Area(*outRec2) > 0))
+          ReversePolyPtLinks(outRec2->Pts);
+
+      } else if (Poly2ContainsPoly1(outRec1->Pts, outRec2->Pts))
+      {
+        //outRec2 contains outRec1 ...
+        outRec2->IsHole = outRec1->IsHole;
+        outRec1->IsHole = !outRec2->IsHole;
+        outRec2->FirstLeft = outRec1->FirstLeft;
+        outRec1->FirstLeft = outRec2;
+
+        if (m_UsingPolyTree) FixupFirstLefts2(outRec1, outRec2);
+
+        if ((outRec1->IsHole ^ m_ReverseOutput) == (Area(*outRec1) > 0))
+          ReversePolyPtLinks(outRec1->Pts);
+      }
+      else
+      {
+        //the 2 polygons are completely separate ...
+        outRec2->IsHole = outRec1->IsHole;
+        outRec2->FirstLeft = outRec1->FirstLeft;
+
+        //fixup FirstLeft pointers that may need reassigning to OutRec2
+        if (m_UsingPolyTree) FixupFirstLefts1(outRec1, outRec2);
+      }
+
+    } else
+    {
+      //joined 2 polygons together ...
+
+      outRec2->Pts = 0;
+      outRec2->BottomPt = 0;
+      outRec2->Idx = outRec1->Idx;
+
+      outRec1->IsHole = holeStateRec->IsHole;
+      if (holeStateRec == outRec2)
+        outRec1->FirstLeft = outRec2->FirstLeft;
+      outRec2->FirstLeft = outRec1;
+
+      if (m_UsingPolyTree) FixupFirstLefts3(outRec2, outRec1);
+    }
+  }
+}
+
+//------------------------------------------------------------------------------
+// ClipperOffset support functions ...
+//------------------------------------------------------------------------------
+
+DoublePoint GetUnitNormal(const IntPoint &pt1, const IntPoint &pt2)
+{
+  if(pt2.X == pt1.X && pt2.Y == pt1.Y)
+    return DoublePoint(0, 0);
+
+  double Dx = (double)(pt2.X - pt1.X);
+  double dy = (double)(pt2.Y - pt1.Y);
+  double f = 1 *1.0/ std::sqrt( Dx*Dx + dy*dy );
+  Dx *= f;
+  dy *= f;
+  return DoublePoint(dy, -Dx);
+}
+
+//------------------------------------------------------------------------------
+// ClipperOffset class
+//------------------------------------------------------------------------------
+
+ClipperOffset::ClipperOffset(double miterLimit, double arcTolerance)
+{
+  this->MiterLimit = miterLimit;
+  this->ArcTolerance = arcTolerance;
+  m_lowest.X = -1;
+}
+//------------------------------------------------------------------------------
+
+ClipperOffset::~ClipperOffset()
+{
+  Clear();
+}
+//------------------------------------------------------------------------------
+
+void ClipperOffset::Clear()
+{
+  for (int i = 0; i < m_polyNodes.ChildCount(); ++i)
+    delete m_polyNodes.Childs[i];
+  m_polyNodes.Childs.clear();
+  m_lowest.X = -1;
+}
+//------------------------------------------------------------------------------
+
+void ClipperOffset::AddPath(const Path& path, JoinType joinType, EndType endType)
+{
+  int highI = (int)path.size() - 1;
+  if (highI < 0) return;
+  PolyNode* newNode = new PolyNode();
+  newNode->m_jointype = joinType;
+  newNode->m_endtype = endType;
+
+  //strip duplicate points from path and also get index to the lowest point ...
+  if (endType == etClosedLine || endType == etClosedPolygon)
+    while (highI > 0 && path[0] == path[highI]) highI--;
+  newNode->Contour.reserve(highI + 1);
+  newNode->Contour.push_back(path[0]);
+  int j = 0, k = 0;
+  for (int i = 1; i <= highI; i++)
+    if (newNode->Contour[j] != path[i])
+    {
+      j++;
+      newNode->Contour.push_back(path[i]);
+      if (path[i].Y > newNode->Contour[k].Y ||
+        (path[i].Y == newNode->Contour[k].Y &&
+        path[i].X < newNode->Contour[k].X)) k = j;
+    }
+  if (endType == etClosedPolygon && j < 2)
+  {
+    delete newNode;
+    return;
+  }
+  m_polyNodes.AddChild(*newNode);
+
+  //if this path's lowest pt is lower than all the others then update m_lowest
+  if (endType != etClosedPolygon) return;
+  if (m_lowest.X < 0)
+    m_lowest = IntPoint(m_polyNodes.ChildCount() - 1, k);
+  else
+  {
+    IntPoint ip = m_polyNodes.Childs[(int)m_lowest.X]->Contour[(int)m_lowest.Y];
+    if (newNode->Contour[k].Y > ip.Y ||
+      (newNode->Contour[k].Y == ip.Y &&
+      newNode->Contour[k].X < ip.X))
+      m_lowest = IntPoint(m_polyNodes.ChildCount() - 1, k);
+  }
+}
+//------------------------------------------------------------------------------
+
+void ClipperOffset::AddPaths(const Paths& paths, JoinType joinType, EndType endType)
+{
+  for (Paths::size_type i = 0; i < paths.size(); ++i)
+    AddPath(paths[i], joinType, endType);
+}
+//------------------------------------------------------------------------------
+
+void ClipperOffset::FixOrientations()
+{
+  //fixup orientations of all closed paths if the orientation of the
+  //closed path with the lowermost vertex is wrong ...
+  if (m_lowest.X >= 0 &&
+    !Orientation(m_polyNodes.Childs[(int)m_lowest.X]->Contour))
+  {
+    for (int i = 0; i < m_polyNodes.ChildCount(); ++i)
+    {
+      PolyNode& node = *m_polyNodes.Childs[i];
+      if (node.m_endtype == etClosedPolygon ||
+        (node.m_endtype == etClosedLine && Orientation(node.Contour)))
+          ReversePath(node.Contour);
+    }
+  } else
+  {
+    for (int i = 0; i < m_polyNodes.ChildCount(); ++i)
+    {
+      PolyNode& node = *m_polyNodes.Childs[i];
+      if (node.m_endtype == etClosedLine && !Orientation(node.Contour))
+        ReversePath(node.Contour);
+    }
+  }
+}
+//------------------------------------------------------------------------------
+
+void ClipperOffset::Execute(Paths& solution, double delta)
+{
+  solution.clear();
+  FixOrientations();
+  DoOffset(delta);
+
+  //now clean up 'corners' ...
+  Clipper clpr;
+  clpr.AddPaths(m_destPolys, ptSubject, true);
+  if (delta > 0)
+  {
+    clpr.Execute(ctUnion, solution, pftPositive, pftPositive);
+  }
+  else
+  {
+    IntRect r = clpr.GetBounds();
+    Path outer(4);
+    outer[0] = IntPoint(r.left - 10, r.bottom + 10);
+    outer[1] = IntPoint(r.right + 10, r.bottom + 10);
+    outer[2] = IntPoint(r.right + 10, r.top - 10);
+    outer[3] = IntPoint(r.left - 10, r.top - 10);
+
+    clpr.AddPath(outer, ptSubject, true);
+    clpr.ReverseSolution(true);
+    clpr.Execute(ctUnion, solution, pftNegative, pftNegative);
+    if (solution.size() > 0) solution.erase(solution.begin());
+  }
+}
+//------------------------------------------------------------------------------
+
+void ClipperOffset::Execute(PolyTree& solution, double delta)
+{
+  solution.Clear();
+  FixOrientations();
+  DoOffset(delta);
+
+  //now clean up 'corners' ...
+  Clipper clpr;
+  clpr.AddPaths(m_destPolys, ptSubject, true);
+  if (delta > 0)
+  {
+    clpr.Execute(ctUnion, solution, pftPositive, pftPositive);
+  }
+  else
+  {
+    IntRect r = clpr.GetBounds();
+    Path outer(4);
+    outer[0] = IntPoint(r.left - 10, r.bottom + 10);
+    outer[1] = IntPoint(r.right + 10, r.bottom + 10);
+    outer[2] = IntPoint(r.right + 10, r.top - 10);
+    outer[3] = IntPoint(r.left - 10, r.top - 10);
+
+    clpr.AddPath(outer, ptSubject, true);
+    clpr.ReverseSolution(true);
+    clpr.Execute(ctUnion, solution, pftNegative, pftNegative);
+    //remove the outer PolyNode rectangle ...
+    if (solution.ChildCount() == 1 && solution.Childs[0]->ChildCount() > 0)
+    {
+      PolyNode* outerNode = solution.Childs[0];
+      solution.Childs.reserve(outerNode->ChildCount());
+      solution.Childs[0] = outerNode->Childs[0];
+      solution.Childs[0]->Parent = outerNode->Parent;
+      for (int i = 1; i < outerNode->ChildCount(); ++i)
+        solution.AddChild(*outerNode->Childs[i]);
+    }
+    else
+      solution.Clear();
+  }
+}
+//------------------------------------------------------------------------------
+
+void ClipperOffset::DoOffset(double delta)
+{
+  m_destPolys.clear();
+  m_delta = delta;
+
+  //if Zero offset, just copy any CLOSED polygons to m_p and return ...
+  if (NEAR_ZERO(delta))
+  {
+    m_destPolys.reserve(m_polyNodes.ChildCount());
+    for (int i = 0; i < m_polyNodes.ChildCount(); i++)
+    {
+      PolyNode& node = *m_polyNodes.Childs[i];
+      if (node.m_endtype == etClosedPolygon)
+        m_destPolys.push_back(node.Contour);
+    }
+    return;
+  }
+
+  //see offset_triginometry3.svg in the documentation folder ...
+  if (MiterLimit > 2) m_miterLim = 2/(MiterLimit * MiterLimit);
+  else m_miterLim = 0.5;
+
+  double y;
+  if (ArcTolerance <= 0.0) y = def_arc_tolerance;
+  else if (ArcTolerance > std::fabs(delta) * def_arc_tolerance)
+    y = std::fabs(delta) * def_arc_tolerance;
+  else y = ArcTolerance;
+  //see offset_triginometry2.svg in the documentation folder ...
+  double steps = pi / std::acos(1 - y / std::fabs(delta));
+  if (steps > std::fabs(delta) * pi)
+    steps = std::fabs(delta) * pi;  //ie excessive precision check
+  m_sin = std::sin(two_pi / steps);
+  m_cos = std::cos(two_pi / steps);
+  m_StepsPerRad = steps / two_pi;
+  if (delta < 0.0) m_sin = -m_sin;
+
+  m_destPolys.reserve(m_polyNodes.ChildCount() * 2);
+  for (int i = 0; i < m_polyNodes.ChildCount(); i++)
+  {
+    PolyNode& node = *m_polyNodes.Childs[i];
+    m_srcPoly = node.Contour;
+
+    int len = (int)m_srcPoly.size();
+    if (len == 0 || (delta <= 0 && (len < 3 || node.m_endtype != etClosedPolygon)))
+        continue;
+
+    m_destPoly.clear();
+    if (len == 1)
+    {
+      if (node.m_jointype == jtRound)
+      {
+        double X = 1.0, Y = 0.0;
+        for (cInt j = 1; j <= steps; j++)
+        {
+          m_destPoly.push_back(IntPoint(
+            Round(m_srcPoly[0].X + X * delta),
+            Round(m_srcPoly[0].Y + Y * delta)));
+          double X2 = X;
+          X = X * m_cos - m_sin * Y;
+          Y = X2 * m_sin + Y * m_cos;
+        }
+      }
+      else
+      {
+        double X = -1.0, Y = -1.0;
+        for (int j = 0; j < 4; ++j)
+        {
+          m_destPoly.push_back(IntPoint(
+            Round(m_srcPoly[0].X + X * delta),
+            Round(m_srcPoly[0].Y + Y * delta)));
+          if (X < 0) X = 1;
+          else if (Y < 0) Y = 1;
+          else X = -1;
+        }
+      }
+      m_destPolys.push_back(m_destPoly);
+      continue;
+    }
+    //build m_normals ...
+    m_normals.clear();
+    m_normals.reserve(len);
+    for (int j = 0; j < len - 1; ++j)
+      m_normals.push_back(GetUnitNormal(m_srcPoly[j], m_srcPoly[j + 1]));
+    if (node.m_endtype == etClosedLine || node.m_endtype == etClosedPolygon)
+      m_normals.push_back(GetUnitNormal(m_srcPoly[len - 1], m_srcPoly[0]));
+    else
+      m_normals.push_back(DoublePoint(m_normals[len - 2]));
+
+    if (node.m_endtype == etClosedPolygon)
+    {
+      int k = len - 1;
+      for (int j = 0; j < len; ++j)
+        OffsetPoint(j, k, node.m_jointype);
+      m_destPolys.push_back(m_destPoly);
+    }
+    else if (node.m_endtype == etClosedLine)
+    {
+      int k = len - 1;
+      for (int j = 0; j < len; ++j)
+        OffsetPoint(j, k, node.m_jointype);
+      m_destPolys.push_back(m_destPoly);
+      m_destPoly.clear();
+      //re-build m_normals ...
+      DoublePoint n = m_normals[len -1];
+      for (int j = len - 1; j > 0; j--)
+        m_normals[j] = DoublePoint(-m_normals[j - 1].X, -m_normals[j - 1].Y);
+      m_normals[0] = DoublePoint(-n.X, -n.Y);
+      k = 0;
+      for (int j = len - 1; j >= 0; j--)
+        OffsetPoint(j, k, node.m_jointype);
+      m_destPolys.push_back(m_destPoly);
+    }
+    else
+    {
+      int k = 0;
+      for (int j = 1; j < len - 1; ++j)
+        OffsetPoint(j, k, node.m_jointype);
+
+      IntPoint pt1;
+      if (node.m_endtype == etOpenButt)
+      {
+        int j = len - 1;
+        pt1 = IntPoint((cInt)Round(m_srcPoly[j].X + m_normals[j].X *
+          delta), (cInt)Round(m_srcPoly[j].Y + m_normals[j].Y * delta));
+        m_destPoly.push_back(pt1);
+        pt1 = IntPoint((cInt)Round(m_srcPoly[j].X - m_normals[j].X *
+          delta), (cInt)Round(m_srcPoly[j].Y - m_normals[j].Y * delta));
+        m_destPoly.push_back(pt1);
+      }
+      else
+      {
+        int j = len - 1;
+        k = len - 2;
+        m_sinA = 0;
+        m_normals[j] = DoublePoint(-m_normals[j].X, -m_normals[j].Y);
+        if (node.m_endtype == etOpenSquare)
+          DoSquare(j, k);
+        else
+          DoRound(j, k);
+      }
+
+      //re-build m_normals ...
+      for (int j = len - 1; j > 0; j--)
+        m_normals[j] = DoublePoint(-m_normals[j - 1].X, -m_normals[j - 1].Y);
+      m_normals[0] = DoublePoint(-m_normals[1].X, -m_normals[1].Y);
+
+      k = len - 1;
+      for (int j = k - 1; j > 0; --j) OffsetPoint(j, k, node.m_jointype);
+
+      if (node.m_endtype == etOpenButt)
+      {
+        pt1 = IntPoint((cInt)Round(m_srcPoly[0].X - m_normals[0].X * delta),
+          (cInt)Round(m_srcPoly[0].Y - m_normals[0].Y * delta));
+        m_destPoly.push_back(pt1);
+        pt1 = IntPoint((cInt)Round(m_srcPoly[0].X + m_normals[0].X * delta),
+          (cInt)Round(m_srcPoly[0].Y + m_normals[0].Y * delta));
+        m_destPoly.push_back(pt1);
+      }
+      else
+      {
+        k = 1;
+        m_sinA = 0;
+        if (node.m_endtype == etOpenSquare)
+          DoSquare(0, 1);
+        else
+          DoRound(0, 1);
+      }
+      m_destPolys.push_back(m_destPoly);
+    }
+  }
+}
+//------------------------------------------------------------------------------
+
+void ClipperOffset::OffsetPoint(int j, int& k, JoinType jointype)
+{
+  //cross product ...
+  m_sinA = (m_normals[k].X * m_normals[j].Y - m_normals[j].X * m_normals[k].Y);
+  if (std::fabs(m_sinA * m_delta) < 1.0)
+  {
+    //dot product ...
+    double cosA = (m_normals[k].X * m_normals[j].X + m_normals[j].Y * m_normals[k].Y );
+    if (cosA > 0) // angle => 0 degrees
+    {
+      m_destPoly.push_back(IntPoint(Round(m_srcPoly[j].X + m_normals[k].X * m_delta),
+        Round(m_srcPoly[j].Y + m_normals[k].Y * m_delta)));
+      return;
+    }
+    //else angle => 180 degrees
+  }
+  else if (m_sinA > 1.0) m_sinA = 1.0;
+  else if (m_sinA < -1.0) m_sinA = -1.0;
+
+  if (m_sinA * m_delta < 0)
+  {
+    m_destPoly.push_back(IntPoint(Round(m_srcPoly[j].X + m_normals[k].X * m_delta),
+      Round(m_srcPoly[j].Y + m_normals[k].Y * m_delta)));
+    m_destPoly.push_back(m_srcPoly[j]);
+    m_destPoly.push_back(IntPoint(Round(m_srcPoly[j].X + m_normals[j].X * m_delta),
+      Round(m_srcPoly[j].Y + m_normals[j].Y * m_delta)));
+  }
+  else
+    switch (jointype)
+    {
+      case jtMiter:
+        {
+          double r = 1 + (m_normals[j].X * m_normals[k].X +
+            m_normals[j].Y * m_normals[k].Y);
+          if (r >= m_miterLim) DoMiter(j, k, r); else DoSquare(j, k);
+          break;
+        }
+      case jtSquare: DoSquare(j, k); break;
+      case jtRound: DoRound(j, k); break;
+    }
+  k = j;
+}
+//------------------------------------------------------------------------------
+
+void ClipperOffset::DoSquare(int j, int k)
+{
+  double dx = std::tan(std::atan2(m_sinA,
+      m_normals[k].X * m_normals[j].X + m_normals[k].Y * m_normals[j].Y) / 4);
+  m_destPoly.push_back(IntPoint(
+      Round(m_srcPoly[j].X + m_delta * (m_normals[k].X - m_normals[k].Y * dx)),
+      Round(m_srcPoly[j].Y + m_delta * (m_normals[k].Y + m_normals[k].X * dx))));
+  m_destPoly.push_back(IntPoint(
+      Round(m_srcPoly[j].X + m_delta * (m_normals[j].X + m_normals[j].Y * dx)),
+      Round(m_srcPoly[j].Y + m_delta * (m_normals[j].Y - m_normals[j].X * dx))));
+}
+//------------------------------------------------------------------------------
+
+void ClipperOffset::DoMiter(int j, int k, double r)
+{
+  double q = m_delta / r;
+  m_destPoly.push_back(IntPoint(Round(m_srcPoly[j].X + (m_normals[k].X + m_normals[j].X) * q),
+      Round(m_srcPoly[j].Y + (m_normals[k].Y + m_normals[j].Y) * q)));
+}
+//------------------------------------------------------------------------------
+
+void ClipperOffset::DoRound(int j, int k)
+{
+  double a = std::atan2(m_sinA,
+  m_normals[k].X * m_normals[j].X + m_normals[k].Y * m_normals[j].Y);
+  int steps = std::max((int)Round(m_StepsPerRad * std::fabs(a)), 1);
+
+  double X = m_normals[k].X, Y = m_normals[k].Y, X2;
+  for (int i = 0; i < steps; ++i)
+  {
+    m_destPoly.push_back(IntPoint(
+        Round(m_srcPoly[j].X + X * m_delta),
+        Round(m_srcPoly[j].Y + Y * m_delta)));
+    X2 = X;
+    X = X * m_cos - m_sin * Y;
+    Y = X2 * m_sin + Y * m_cos;
+  }
+  m_destPoly.push_back(IntPoint(
+  Round(m_srcPoly[j].X + m_normals[j].X * m_delta),
+  Round(m_srcPoly[j].Y + m_normals[j].Y * m_delta)));
+}
+
+//------------------------------------------------------------------------------
+// Miscellaneous public functions
+//------------------------------------------------------------------------------
+
+void Clipper::DoSimplePolygons()
+{
+  PolyOutList::size_type i = 0;
+  while (i < m_PolyOuts.size())
+  {
+    OutRec* outrec = m_PolyOuts[i++];
+    OutPt* op = outrec->Pts;
+    if (!op || outrec->IsOpen) continue;
+    do //for each Pt in Polygon until duplicate found do ...
+    {
+      OutPt* op2 = op->Next;
+      while (op2 != outrec->Pts)
+      {
+        if ((op->Pt == op2->Pt) && op2->Next != op && op2->Prev != op)
+        {
+          //split the polygon into two ...
+          OutPt* op3 = op->Prev;
+          OutPt* op4 = op2->Prev;
+          op->Prev = op4;
+          op4->Next = op;
+          op2->Prev = op3;
+          op3->Next = op2;
+
+          outrec->Pts = op;
+          OutRec* outrec2 = CreateOutRec();
+          outrec2->Pts = op2;
+          UpdateOutPtIdxs(*outrec2);
+          if (Poly2ContainsPoly1(outrec2->Pts, outrec->Pts))
+          {
+            //OutRec2 is contained by OutRec1 ...
+            outrec2->IsHole = !outrec->IsHole;
+            outrec2->FirstLeft = outrec;
+            if (m_UsingPolyTree) FixupFirstLefts2(outrec2, outrec);
+          }
+          else
+            if (Poly2ContainsPoly1(outrec->Pts, outrec2->Pts))
+          {
+            //OutRec1 is contained by OutRec2 ...
+            outrec2->IsHole = outrec->IsHole;
+            outrec->IsHole = !outrec2->IsHole;
+            outrec2->FirstLeft = outrec->FirstLeft;
+            outrec->FirstLeft = outrec2;
+            if (m_UsingPolyTree) FixupFirstLefts2(outrec, outrec2);
+            }
+            else
+          {
+            //the 2 polygons are separate ...
+            outrec2->IsHole = outrec->IsHole;
+            outrec2->FirstLeft = outrec->FirstLeft;
+            if (m_UsingPolyTree) FixupFirstLefts1(outrec, outrec2);
+            }
+          op2 = op; //ie get ready for the Next iteration
+        }
+        op2 = op2->Next;
+      }
+      op = op->Next;
+    }
+    while (op != outrec->Pts);
+  }
+}
+//------------------------------------------------------------------------------
+
+void ReversePath(Path& p)
+{
+  std::reverse(p.begin(), p.end());
+}
+//------------------------------------------------------------------------------
+
+void ReversePaths(Paths& p)
+{
+  for (Paths::size_type i = 0; i < p.size(); ++i)
+    ReversePath(p[i]);
+}
+//------------------------------------------------------------------------------
+
+void SimplifyPolygon(const Path &in_poly, Paths &out_polys, PolyFillType fillType)
+{
+  Clipper c;
+  c.StrictlySimple(true);
+  c.AddPath(in_poly, ptSubject, true);
+  c.Execute(ctUnion, out_polys, fillType, fillType);
+}
+//------------------------------------------------------------------------------
+
+void SimplifyPolygons(const Paths &in_polys, Paths &out_polys, PolyFillType fillType)
+{
+  Clipper c;
+  c.StrictlySimple(true);
+  c.AddPaths(in_polys, ptSubject, true);
+  c.Execute(ctUnion, out_polys, fillType, fillType);
+}
+//------------------------------------------------------------------------------
+
+void SimplifyPolygons(Paths &polys, PolyFillType fillType)
+{
+  SimplifyPolygons(polys, polys, fillType);
+}
+//------------------------------------------------------------------------------
+
+inline double DistanceSqrd(const IntPoint& pt1, const IntPoint& pt2)
+{
+  double Dx = ((double)pt1.X - pt2.X);
+  double dy = ((double)pt1.Y - pt2.Y);
+  return (Dx*Dx + dy*dy);
+}
+//------------------------------------------------------------------------------
+
+double DistanceFromLineSqrd(
+  const IntPoint& pt, const IntPoint& ln1, const IntPoint& ln2)
+{
+  //The equation of a line in general form (Ax + By + C = 0)
+  //given 2 points (x,y) & (x,y) is ...
+  //(y - y)x + (x - x)y + (y - y)x - (x - x)y = 0
+  //A = (y - y); B = (x - x); C = (y - y)x - (x - x)y
+  //perpendicular distance of point (x,y) = (Ax + By + C)/Sqrt(A + B)
+  //see http://en.wikipedia.org/wiki/Perpendicular_distance
+  double A = double(ln1.Y - ln2.Y);
+  double B = double(ln2.X - ln1.X);
+  double C = A * ln1.X  + B * ln1.Y;
+  C = A * pt.X + B * pt.Y - C;
+  return (C * C) / (A * A + B * B);
+}
+//---------------------------------------------------------------------------
+
+bool SlopesNearCollinear(const IntPoint& pt1,
+    const IntPoint& pt2, const IntPoint& pt3, double distSqrd)
+{
+  //this function is more accurate when the point that's geometrically
+  //between the other 2 points is the one that's tested for distance.
+  //ie makes it more likely to pick up 'spikes' ...
+	if (Abs(pt1.X - pt2.X) > Abs(pt1.Y - pt2.Y))
+	{
+    if ((pt1.X > pt2.X) == (pt1.X < pt3.X))
+      return DistanceFromLineSqrd(pt1, pt2, pt3) < distSqrd;
+    else if ((pt2.X > pt1.X) == (pt2.X < pt3.X))
+      return DistanceFromLineSqrd(pt2, pt1, pt3) < distSqrd;
+		else
+	    return DistanceFromLineSqrd(pt3, pt1, pt2) < distSqrd;
+	}
+	else
+	{
+    if ((pt1.Y > pt2.Y) == (pt1.Y < pt3.Y))
+      return DistanceFromLineSqrd(pt1, pt2, pt3) < distSqrd;
+    else if ((pt2.Y > pt1.Y) == (pt2.Y < pt3.Y))
+      return DistanceFromLineSqrd(pt2, pt1, pt3) < distSqrd;
+		else
+      return DistanceFromLineSqrd(pt3, pt1, pt2) < distSqrd;
+	}
+}
+//------------------------------------------------------------------------------
+
+bool PointsAreClose(IntPoint pt1, IntPoint pt2, double distSqrd)
+{
+    double Dx = (double)pt1.X - pt2.X;
+    double dy = (double)pt1.Y - pt2.Y;
+    return ((Dx * Dx) + (dy * dy) <= distSqrd);
+}
+//------------------------------------------------------------------------------
+
+OutPt* ExcludeOp(OutPt* op)
+{
+  OutPt* result = op->Prev;
+  result->Next = op->Next;
+  op->Next->Prev = result;
+  result->Idx = 0;
+  return result;
+}
+//------------------------------------------------------------------------------
+
+void CleanPolygon(const Path& in_poly, Path& out_poly, double distance)
+{
+  //distance = proximity in units/pixels below which vertices
+  //will be stripped. Default ~= sqrt(2).
+
+  size_t size = in_poly.size();
+
+  if (size == 0)
+  {
+    out_poly.clear();
+    return;
+  }
+
+  OutPt* outPts = new OutPt[size];
+  for (size_t i = 0; i < size; ++i)
+  {
+    outPts[i].Pt = in_poly[i];
+    outPts[i].Next = &outPts[(i + 1) % size];
+    outPts[i].Next->Prev = &outPts[i];
+    outPts[i].Idx = 0;
+  }
+
+  double distSqrd = distance * distance;
+  OutPt* op = &outPts[0];
+  while (op->Idx == 0 && op->Next != op->Prev)
+  {
+    if (PointsAreClose(op->Pt, op->Prev->Pt, distSqrd))
+    {
+      op = ExcludeOp(op);
+      size--;
+    }
+    else if (PointsAreClose(op->Prev->Pt, op->Next->Pt, distSqrd))
+    {
+      ExcludeOp(op->Next);
+      op = ExcludeOp(op);
+      size -= 2;
+    }
+    else if (SlopesNearCollinear(op->Prev->Pt, op->Pt, op->Next->Pt, distSqrd))
+    {
+      op = ExcludeOp(op);
+      size--;
+    }
+    else
+    {
+      op->Idx = 1;
+      op = op->Next;
+    }
+  }
+
+  if (size < 3) size = 0;
+  out_poly.resize(size);
+  for (size_t i = 0; i < size; ++i)
+  {
+    out_poly[i] = op->Pt;
+    op = op->Next;
+  }
+  delete [] outPts;
+}
+//------------------------------------------------------------------------------
+
+void CleanPolygon(Path& poly, double distance)
+{
+  CleanPolygon(poly, poly, distance);
+}
+//------------------------------------------------------------------------------
+
+void CleanPolygons(const Paths& in_polys, Paths& out_polys, double distance)
+{
+  out_polys.resize(in_polys.size());
+  for (Paths::size_type i = 0; i < in_polys.size(); ++i)
+    CleanPolygon(in_polys[i], out_polys[i], distance);
+}
+//------------------------------------------------------------------------------
+
+void CleanPolygons(Paths& polys, double distance)
+{
+  CleanPolygons(polys, polys, distance);
+}
+//------------------------------------------------------------------------------
+
+void Minkowski(const Path& poly, const Path& path,
+  Paths& solution, bool isSum, bool isClosed)
+{
+  int delta = (isClosed ? 1 : 0);
+  size_t polyCnt = poly.size();
+  size_t pathCnt = path.size();
+  Paths pp;
+  pp.reserve(pathCnt);
+  if (isSum)
+    for (size_t i = 0; i < pathCnt; ++i)
+    {
+      Path p;
+      p.reserve(polyCnt);
+      for (size_t j = 0; j < poly.size(); ++j)
+        p.push_back(IntPoint(path[i].X + poly[j].X, path[i].Y + poly[j].Y));
+      pp.push_back(p);
+    }
+  else
+    for (size_t i = 0; i < pathCnt; ++i)
+    {
+      Path p;
+      p.reserve(polyCnt);
+      for (size_t j = 0; j < poly.size(); ++j)
+        p.push_back(IntPoint(path[i].X - poly[j].X, path[i].Y - poly[j].Y));
+      pp.push_back(p);
+    }
+
+  solution.clear();
+  solution.reserve((pathCnt + delta) * (polyCnt + 1));
+  for (size_t i = 0; i < pathCnt - 1 + delta; ++i)
+    for (size_t j = 0; j < polyCnt; ++j)
+    {
+      Path quad;
+      quad.reserve(4);
+      quad.push_back(pp[i % pathCnt][j % polyCnt]);
+      quad.push_back(pp[(i + 1) % pathCnt][j % polyCnt]);
+      quad.push_back(pp[(i + 1) % pathCnt][(j + 1) % polyCnt]);
+      quad.push_back(pp[i % pathCnt][(j + 1) % polyCnt]);
+      if (!Orientation(quad)) ReversePath(quad);
+      solution.push_back(quad);
+    }
+}
+//------------------------------------------------------------------------------
+
+void MinkowskiSum(const Path& pattern, const Path& path, Paths& solution, bool pathIsClosed)
+{
+  Minkowski(pattern, path, solution, true, pathIsClosed);
+  Clipper c;
+  c.AddPaths(solution, ptSubject, true);
+  c.Execute(ctUnion, solution, pftNonZero, pftNonZero);
+}
+//------------------------------------------------------------------------------
+
+void TranslatePath(const Path& input, Path& output, const IntPoint delta)
+{
+  //precondition: input != output
+  output.resize(input.size());
+  for (size_t i = 0; i < input.size(); ++i)
+    output[i] = IntPoint(input[i].X + delta.X, input[i].Y + delta.Y);
+}
+//------------------------------------------------------------------------------
+
+void MinkowskiSum(const Path& pattern, const Paths& paths, Paths& solution, bool pathIsClosed)
+{
+  Clipper c;
+  for (size_t i = 0; i < paths.size(); ++i)
+  {
+    Paths tmp;
+    Minkowski(pattern, paths[i], tmp, true, pathIsClosed);
+    c.AddPaths(tmp, ptSubject, true);
+    if (pathIsClosed)
+    {
+      Path tmp2;
+      TranslatePath(paths[i], tmp2, pattern[0]);
+      c.AddPath(tmp2, ptClip, true);
+    }
+  }
+    c.Execute(ctUnion, solution, pftNonZero, pftNonZero);
+}
+//------------------------------------------------------------------------------
+
+void MinkowskiDiff(const Path& poly1, const Path& poly2, Paths& solution)
+{
+  Minkowski(poly1, poly2, solution, false, true);
+  Clipper c;
+  c.AddPaths(solution, ptSubject, true);
+  c.Execute(ctUnion, solution, pftNonZero, pftNonZero);
+}
+//------------------------------------------------------------------------------
+
+enum NodeType {ntAny, ntOpen, ntClosed};
+
+void AddPolyNodeToPaths(const PolyNode& polynode, NodeType nodetype, Paths& paths)
+{
+  bool match = true;
+  if (nodetype == ntClosed) match = !polynode.IsOpen();
+  else if (nodetype == ntOpen) return;
+
+  if (!polynode.Contour.empty() && match)
+    paths.push_back(polynode.Contour);
+  for (int i = 0; i < polynode.ChildCount(); ++i)
+    AddPolyNodeToPaths(*polynode.Childs[i], nodetype, paths);
+}
+//------------------------------------------------------------------------------
+
+void PolyTreeToPaths(const PolyTree& polytree, Paths& paths)
+{
+  paths.resize(0);
+  paths.reserve(polytree.Total());
+  AddPolyNodeToPaths(polytree, ntAny, paths);
+}
+//------------------------------------------------------------------------------
+
+void ClosedPathsFromPolyTree(const PolyTree& polytree, Paths& paths)
+{
+  paths.resize(0);
+  paths.reserve(polytree.Total());
+  AddPolyNodeToPaths(polytree, ntClosed, paths);
+}
+//------------------------------------------------------------------------------
+
+void OpenPathsFromPolyTree(PolyTree& polytree, Paths& paths)
+{
+  paths.resize(0);
+  paths.reserve(polytree.Total());
+  //Open paths are top level only, so ...
+  for (int i = 0; i < polytree.ChildCount(); ++i)
+    if (polytree.Childs[i]->IsOpen())
+      paths.push_back(polytree.Childs[i]->Contour);
+}
+//------------------------------------------------------------------------------
+
+std::ostream& operator <<(std::ostream &s, const IntPoint &p)
+{
+  s << "(" << p.X << "," << p.Y << ")";
+  return s;
+}
+//------------------------------------------------------------------------------
+
+std::ostream& operator <<(std::ostream &s, const Path &p)
+{
+  if (p.empty()) return s;
+  Path::size_type last = p.size() -1;
+  for (Path::size_type i = 0; i < last; i++)
+    s << "(" << p[i].X << "," << p[i].Y << "), ";
+  s << "(" << p[last].X << "," << p[last].Y << ")\n";
+  return s;
+}
+//------------------------------------------------------------------------------
+
+std::ostream& operator <<(std::ostream &s, const Paths &p)
+{
+  for (Paths::size_type i = 0; i < p.size(); i++)
+    s << p[i];
+  s << "\n";
+  return s;
+}
+//------------------------------------------------------------------------------
+
+} //ClipperLib namespace
--- a/third_party/clipper/clipper.hpp
+++ b/third_party/clipper/clipper.hpp
+/*******************************************************************************
+*                                                                              *
+* Author    :  Angus Johnson                                                   *
+* Version   :  6.4.2                                                           *
+* Date      :  27 February 2017                                                *
+* Website   :  http://www.angusj.com                                           *
+* Copyright :  Angus Johnson 2010-2017                                         *
+*                                                                              *
+* License:                                                                     *
+* Use, modification & distribution is subject to Boost Software License Ver 1. *
+* http://www.boost.org/LICENSE_1_0.txt                                         *
+*                                                                              *
+* Attributions:                                                                *
+* The code in this library is an extension of Bala Vatti's clipping algorithm: *
+* "A generic solution to polygon clipping"                                     *
+* Communications of the ACM, Vol 35, Issue 7 (July 1992) pp 56-63.             *
+* http://portal.acm.org/citation.cfm?id=129906                                 *
+*                                                                              *
+* Computer graphics and geometric modeling: implementation and algorithms      *
+* By Max K. Agoston                                                            *
+* Springer; 1 edition (January 4, 2005)                                        *
+* http://books.google.com/books?q=vatti+clipping+agoston                       *
+*                                                                              *
+* See also:                                                                    *
+* "Polygon Offsetting by Computing Winding Numbers"                            *
+* Paper no. DETC2005-85513 pp. 565-575                                         *
+* ASME 2005 International Design Engineering Technical Conferences             *
+* and Computers and Information in Engineering Conference (IDETC/CIE2005)      *
+* September 24-28, 2005 , Long Beach, California, USA                          *
+* http://www.me.berkeley.edu/~mcmains/pubs/DAC05OffsetPolygon.pdf              *
+*                                                                              *
+*******************************************************************************/
+
+#ifndef clipper_hpp
+#define clipper_hpp
+
+#define CLIPPER_VERSION "6.4.2"
+
+//use_int32: When enabled 32bit ints are used instead of 64bit ints. This
+//improve performance but coordinate values are limited to the range +/- 46340
+//#define use_int32
+
+//use_xyz: adds a Z member to IntPoint. Adds a minor cost to perfomance.
+//#define use_xyz
+
+//use_lines: Enables line clipping. Adds a very minor cost to performance.
+#define use_lines
+
+//use_deprecated: Enables temporary support for the obsolete functions
+//#define use_deprecated
+
+#include <vector>
+#include <list>
+#include <set>
+#include <stdexcept>
+#include <cstring>
+#include <cstdlib>
+#include <ostream>
+#include <functional>
+#include <queue>
+
+namespace ClipperLib {
+
+enum ClipType { ctIntersection, ctUnion, ctDifference, ctXor };
+enum PolyType { ptSubject, ptClip };
+//By far the most widely used winding rules for polygon filling are
+//EvenOdd & NonZero (GDI, GDI+, XLib, OpenGL, Cairo, AGG, Quartz, SVG, Gr32)
+//Others rules include Positive, Negative and ABS_GTR_EQ_TWO (only in OpenGL)
+//see http://glprogramming.com/red/chapter11.html
+enum PolyFillType { pftEvenOdd, pftNonZero, pftPositive, pftNegative };
+
+#ifdef use_int32
+  typedef int cInt;
+  static cInt const loRange = 0x7FFF;
+  static cInt const hiRange = 0x7FFF;
+#else
+  typedef signed long long cInt;
+  static cInt const loRange = 0x3FFFFFFF;
+  static cInt const hiRange = 0x3FFFFFFFFFFFFFFFLL;
+  typedef signed long long long64;     //used by Int128 class
+  typedef unsigned long long ulong64;
+
+#endif
+
+struct IntPoint {
+  cInt X;
+  cInt Y;
+#ifdef use_xyz
+  cInt Z;
+  IntPoint(cInt x = 0, cInt y = 0, cInt z = 0): X(x), Y(y), Z(z) {};
+#else
+  IntPoint(cInt x = 0, cInt y = 0): X(x), Y(y) {};
+#endif
+
+  friend inline bool operator== (const IntPoint& a, const IntPoint& b)
+  {
+    return a.X == b.X && a.Y == b.Y;
+  }
+  friend inline bool operator!= (const IntPoint& a, const IntPoint& b)
+  {
+    return a.X != b.X  || a.Y != b.Y;
+  }
+};
+//------------------------------------------------------------------------------
+
+typedef std::vector< IntPoint > Path;
+typedef std::vector< Path > Paths;
+
+inline Path& operator <<(Path& poly, const IntPoint& p) {poly.push_back(p); return poly;}
+inline Paths& operator <<(Paths& polys, const Path& p) {polys.push_back(p); return polys;}
+
+std::ostream& operator <<(std::ostream &s, const IntPoint &p);
+std::ostream& operator <<(std::ostream &s, const Path &p);
+std::ostream& operator <<(std::ostream &s, const Paths &p);
+
+struct DoublePoint
+{
+  double X;
+  double Y;
+  DoublePoint(double x = 0, double y = 0) : X(x), Y(y) {}
+  DoublePoint(IntPoint ip) : X((double)ip.X), Y((double)ip.Y) {}
+};
+//------------------------------------------------------------------------------
+
+#ifdef use_xyz
+typedef void (*ZFillCallback)(IntPoint& e1bot, IntPoint& e1top, IntPoint& e2bot, IntPoint& e2top, IntPoint& pt);
+#endif
+
+enum InitOptions {ioReverseSolution = 1, ioStrictlySimple = 2, ioPreserveCollinear = 4};
+enum JoinType {jtSquare, jtRound, jtMiter};
+enum EndType {etClosedPolygon, etClosedLine, etOpenButt, etOpenSquare, etOpenRound};
+
+class PolyNode;
+typedef std::vector< PolyNode* > PolyNodes;
+
+class PolyNode
+{
+public:
+    PolyNode();
+    virtual ~PolyNode(){};
+    Path Contour;
+    PolyNodes Childs;
+    PolyNode* Parent;
+    PolyNode* GetNext() const;
+    bool IsHole() const;
+    bool IsOpen() const;
+    int ChildCount() const;
+private:
+    //PolyNode& operator =(PolyNode& other);
+    unsigned Index; //node index in Parent.Childs
+    bool m_IsOpen;
+    JoinType m_jointype;
+    EndType m_endtype;
+    PolyNode* GetNextSiblingUp() const;
+    void AddChild(PolyNode& child);
+    friend class Clipper; //to access Index
+    friend class ClipperOffset;
+};
+
+class PolyTree: public PolyNode
+{
+public:
+    ~PolyTree(){ Clear(); };
+    PolyNode* GetFirst() const;
+    void Clear();
+    int Total() const;
+private:
+  //PolyTree& operator =(PolyTree& other);
+  PolyNodes AllNodes;
+    friend class Clipper; //to access AllNodes
+};
+
+bool Orientation(const Path &poly);
+double Area(const Path &poly);
+int PointInPolygon(const IntPoint &pt, const Path &path);
+
+void SimplifyPolygon(const Path &in_poly, Paths &out_polys, PolyFillType fillType = pftEvenOdd);
+void SimplifyPolygons(const Paths &in_polys, Paths &out_polys, PolyFillType fillType = pftEvenOdd);
+void SimplifyPolygons(Paths &polys, PolyFillType fillType = pftEvenOdd);
+
+void CleanPolygon(const Path& in_poly, Path& out_poly, double distance = 1.415);
+void CleanPolygon(Path& poly, double distance = 1.415);
+void CleanPolygons(const Paths& in_polys, Paths& out_polys, double distance = 1.415);
+void CleanPolygons(Paths& polys, double distance = 1.415);
+
+void MinkowskiSum(const Path& pattern, const Path& path, Paths& solution, bool pathIsClosed);
+void MinkowskiSum(const Path& pattern, const Paths& paths, Paths& solution, bool pathIsClosed);
+void MinkowskiDiff(const Path& poly1, const Path& poly2, Paths& solution);
+
+void PolyTreeToPaths(const PolyTree& polytree, Paths& paths);
+void ClosedPathsFromPolyTree(const PolyTree& polytree, Paths& paths);
+void OpenPathsFromPolyTree(PolyTree& polytree, Paths& paths);
+
+void ReversePath(Path& p);
+void ReversePaths(Paths& p);
+
+struct IntRect { cInt left; cInt top; cInt right; cInt bottom; };
+
+//enums that are used internally ...
+enum EdgeSide { esLeft = 1, esRight = 2};
+
+//forward declarations (for stuff used internally) ...
+struct TEdge;
+struct IntersectNode;
+struct LocalMinimum;
+struct OutPt;
+struct OutRec;
+struct Join;
+
+typedef std::vector < OutRec* > PolyOutList;
+typedef std::vector < TEdge* > EdgeList;
+typedef std::vector < Join* > JoinList;
+typedef std::vector < IntersectNode* > IntersectList;
+
+//------------------------------------------------------------------------------
+
+//ClipperBase is the ancestor to the Clipper class. It should not be
+//instantiated directly. This class simply abstracts the conversion of sets of
+//polygon coordinates into edge objects that are stored in a LocalMinima list.
+class ClipperBase
+{
+public:
+  ClipperBase();
+  virtual ~ClipperBase();
+  virtual bool AddPath(const Path &pg, PolyType PolyTyp, bool Closed);
+  bool AddPaths(const Paths &ppg, PolyType PolyTyp, bool Closed);
+  virtual void Clear();
+  IntRect GetBounds();
+  bool PreserveCollinear() {return m_PreserveCollinear;};
+  void PreserveCollinear(bool value) {m_PreserveCollinear = value;};
+protected:
+  void DisposeLocalMinimaList();
+  TEdge* AddBoundsToLML(TEdge *e, bool IsClosed);
+  virtual void Reset();
+  TEdge* ProcessBound(TEdge* E, bool IsClockwise);
+  void InsertScanbeam(const cInt Y);
+  bool PopScanbeam(cInt &Y);
+  bool LocalMinimaPending();
+  bool PopLocalMinima(cInt Y, const LocalMinimum *&locMin);
+  OutRec* CreateOutRec();
+  void DisposeAllOutRecs();
+  void DisposeOutRec(PolyOutList::size_type index);
+  void SwapPositionsInAEL(TEdge *edge1, TEdge *edge2);
+  void DeleteFromAEL(TEdge *e);
+  void UpdateEdgeIntoAEL(TEdge *&e);
+
+  typedef std::vector<LocalMinimum> MinimaList;
+  MinimaList::iterator m_CurrentLM;
+  MinimaList           m_MinimaList;
+
+  bool              m_UseFullRange;
+  EdgeList          m_edges;
+  bool              m_PreserveCollinear;
+  bool              m_HasOpenPaths;
+  PolyOutList       m_PolyOuts;
+  TEdge           *m_ActiveEdges;
+
+  typedef std::priority_queue<cInt> ScanbeamList;
+  ScanbeamList     m_Scanbeam;
+};
+//------------------------------------------------------------------------------
+
+class Clipper : public virtual ClipperBase
+{
+public:
+  Clipper(int initOptions = 0);
+  bool Execute(ClipType clipType,
+      Paths &solution,
+      PolyFillType fillType = pftEvenOdd);
+  bool Execute(ClipType clipType,
+      Paths &solution,
+      PolyFillType subjFillType,
+      PolyFillType clipFillType);
+  bool Execute(ClipType clipType,
+      PolyTree &polytree,
+      PolyFillType fillType = pftEvenOdd);
+  bool Execute(ClipType clipType,
+      PolyTree &polytree,
+      PolyFillType subjFillType,
+      PolyFillType clipFillType);
+  bool ReverseSolution() { return m_ReverseOutput; };
+  void ReverseSolution(bool value) {m_ReverseOutput = value;};
+  bool StrictlySimple() {return m_StrictSimple;};
+  void StrictlySimple(bool value) {m_StrictSimple = value;};
+  //set the callback function for z value filling on intersections (otherwise Z is 0)
+#ifdef use_xyz
+  void ZFillFunction(ZFillCallback zFillFunc);
+#endif
+protected:
+  virtual bool ExecuteInternal();
+private:
+  JoinList         m_Joins;
+  JoinList         m_GhostJoins;
+  IntersectList    m_IntersectList;
+  ClipType         m_ClipType;
+  typedef std::list<cInt> MaximaList;
+  MaximaList       m_Maxima;
+  TEdge           *m_SortedEdges;
+  bool             m_ExecuteLocked;
+  PolyFillType     m_ClipFillType;
+  PolyFillType     m_SubjFillType;
+  bool             m_ReverseOutput;
+  bool             m_UsingPolyTree;
+  bool             m_StrictSimple;
+#ifdef use_xyz
+  ZFillCallback   m_ZFill; //custom callback
+#endif
+  void SetWindingCount(TEdge& edge);
+  bool IsEvenOddFillType(const TEdge& edge) const;
+  bool IsEvenOddAltFillType(const TEdge& edge) const;
+  void InsertLocalMinimaIntoAEL(const cInt botY);
+  void InsertEdgeIntoAEL(TEdge *edge, TEdge* startEdge);
+  void AddEdgeToSEL(TEdge *edge);
+  bool PopEdgeFromSEL(TEdge *&edge);
+  void CopyAELToSEL();
+  void DeleteFromSEL(TEdge *e);
+  void SwapPositionsInSEL(TEdge *edge1, TEdge *edge2);
+  bool IsContributing(const TEdge& edge) const;
+  bool IsTopHorz(const cInt XPos);
+  void DoMaxima(TEdge *e);
+  void ProcessHorizontals();
+  void ProcessHorizontal(TEdge *horzEdge);
+  void AddLocalMaxPoly(TEdge *e1, TEdge *e2, const IntPoint &pt);
+  OutPt* AddLocalMinPoly(TEdge *e1, TEdge *e2, const IntPoint &pt);
+  OutRec* GetOutRec(int idx);
+  void AppendPolygon(TEdge *e1, TEdge *e2);
+  void IntersectEdges(TEdge *e1, TEdge *e2, IntPoint &pt);
+  OutPt* AddOutPt(TEdge *e, const IntPoint &pt);
+  OutPt* GetLastOutPt(TEdge *e);
+  bool ProcessIntersections(const cInt topY);
+  void BuildIntersectList(const cInt topY);
+  void ProcessIntersectList();
+  void ProcessEdgesAtTopOfScanbeam(const cInt topY);
+  void BuildResult(Paths& polys);
+  void BuildResult2(PolyTree& polytree);
+  void SetHoleState(TEdge *e, OutRec *outrec);
+  void DisposeIntersectNodes();
+  bool FixupIntersectionOrder();
+  void FixupOutPolygon(OutRec &outrec);
+  void FixupOutPolyline(OutRec &outrec);
+  bool IsHole(TEdge *e);
+  bool FindOwnerFromSplitRecs(OutRec &outRec, OutRec *&currOrfl);
+  void FixHoleLinkage(OutRec &outrec);
+  void AddJoin(OutPt *op1, OutPt *op2, const IntPoint offPt);
+  void ClearJoins();
+  void ClearGhostJoins();
+  void AddGhostJoin(OutPt *op, const IntPoint offPt);
+  bool JoinPoints(Join *j, OutRec* outRec1, OutRec* outRec2);
+  void JoinCommonEdges();
+  void DoSimplePolygons();
+  void FixupFirstLefts1(OutRec* OldOutRec, OutRec* NewOutRec);
+  void FixupFirstLefts2(OutRec* InnerOutRec, OutRec* OuterOutRec);
+  void FixupFirstLefts3(OutRec* OldOutRec, OutRec* NewOutRec);
+#ifdef use_xyz
+  void SetZ(IntPoint& pt, TEdge& e1, TEdge& e2);
+#endif
+};
+//------------------------------------------------------------------------------
+
+class ClipperOffset
+{
+public:
+  ClipperOffset(double miterLimit = 2.0, double roundPrecision = 0.25);
+  ~ClipperOffset();
+  void AddPath(const Path& path, JoinType joinType, EndType endType);
+  void AddPaths(const Paths& paths, JoinType joinType, EndType endType);
+  void Execute(Paths& solution, double delta);
+  void Execute(PolyTree& solution, double delta);
+  void Clear();
+  double MiterLimit;
+  double ArcTolerance;
+private:
+  Paths m_destPolys;
+  Path m_srcPoly;
+  Path m_destPoly;
+  std::vector<DoublePoint> m_normals;
+  double m_delta, m_sinA, m_sin, m_cos;
+  double m_miterLim, m_StepsPerRad;
+  IntPoint m_lowest;
+  PolyNode m_polyNodes;
+
+  void FixOrientations();
+  void DoOffset(double delta);
+  void OffsetPoint(int j, int& k, JoinType jointype);
+  void DoSquare(int j, int k);
+  void DoMiter(int j, int k, double r);
+  void DoRound(int j, int k);
+};
+//------------------------------------------------------------------------------
+
+class clipperException : public std::exception
+{
+  public:
+    clipperException(const char* description): m_descr(description) {}
+    virtual ~clipperException() throw() {}
+    virtual const char* what() const throw() {return m_descr.c_str();}
+  private:
+    std::string m_descr;
+};
+//------------------------------------------------------------------------------
+
+} //ClipperLib namespace
+
+#endif //clipper_hpp
--- a/third_party/concurrentqueue/concurrentqueue.h
+++ b/third_party/concurrentqueue/concurrentqueue.h
+// Provides a C++11 implementation of a multi-producer, multi-consumer lock-free queue.
+// An overview, including benchmark results, is provided here:
+//     http://moodycamel.com/blog/2014/a-fast-general-purpose-lock-free-queue-for-c++
+// The full design is also described in excruciating detail at:
+//    http://moodycamel.com/blog/2014/detailed-design-of-a-lock-free-queue
+
+// Simplified BSD license:
+// Copyright (c) 2013-2020, Cameron Desrochers.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice, this list of
+// conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice, this list of
+// conditions and the following disclaimer in the documentation and/or other materials
+// provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+// THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+// OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
+// TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+// EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Also dual-licensed under the Boost Software License (see LICENSE.md)
+
+#pragma once
+
+#if defined(__GNUC__) && !defined(__INTEL_COMPILER)
+// Disable -Wconversion warnings (spuriously triggered when Traits::size_t and
+// Traits::index_t are set to < 32 bits, causing integer promotion, causing warnings
+// upon assigning any computed values)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wconversion"
+
+#ifdef MCDBGQ_USE_RELACY
+#pragma GCC diagnostic ignored "-Wint-to-pointer-cast"
+#endif
+#endif
+
+#if defined(_MSC_VER) && (!defined(_HAS_CXX17) || !_HAS_CXX17)
+// VS2019 with /W4 warns about constant conditional expressions but unless /std=c++17 or higher
+// does not support `if constexpr`, so we have no choice but to simply disable the warning
+#pragma warning(push)
+#pragma warning(disable: 4127)  // conditional expression is constant
+#endif
+
+#if defined(__APPLE__)
+#include "TargetConditionals.h"
+#endif
+
+#ifdef MCDBGQ_USE_RELACY
+#include "relacy/relacy_std.hpp"
+#include "relacy_shims.h"
+// We only use malloc/free anyway, and the delete macro messes up `= delete` method declarations.
+// We'll override the default trait malloc ourselves without a macro.
+#undef new
+#undef delete
+#undef malloc
+#undef free
+#else
+#include <atomic>		// Requires C++11. Sorry VS2010.
+#include <cassert>
+#endif
+#include <cstddef>              // for max_align_t
+#include <cstdint>
+#include <cstdlib>
+#include <type_traits>
+#include <algorithm>
+#include <utility>
+#include <limits>
+#include <climits>		// for CHAR_BIT
+#include <array>
+#include <thread>		// partly for __WINPTHREADS_VERSION if on MinGW-w64 w/ POSIX threading
+#include <mutex>        // used for thread exit synchronization
+
+// Platform-specific definitions of a numeric thread ID type and an invalid value
+namespace moodycamel { namespace details {
+	template<typename thread_id_t> struct thread_id_converter {
+		typedef thread_id_t thread_id_numeric_size_t;
+		typedef thread_id_t thread_id_hash_t;
+		static thread_id_hash_t prehash(thread_id_t const& x) { return x; }
+	};
+} }
+#if defined(MCDBGQ_USE_RELACY)
+namespace moodycamel { namespace details {
+	typedef std::uint32_t thread_id_t;
+	static const thread_id_t invalid_thread_id  = 0xFFFFFFFFU;
+	static const thread_id_t invalid_thread_id2 = 0xFFFFFFFEU;
+	static inline thread_id_t thread_id() { return rl::thread_index(); }
+} }
+#elif defined(_WIN32) || defined(__WINDOWS__) || defined(__WIN32__)
+// No sense pulling in windows.h in a header, we'll manually declare the function
+// we use and rely on backwards-compatibility for this not to break
+extern "C" __declspec(dllimport) unsigned long __stdcall GetCurrentThreadId(void);
+namespace moodycamel { namespace details {
+	static_assert(sizeof(unsigned long) == sizeof(std::uint32_t), "Expected size of unsigned long to be 32 bits on Windows");
+	typedef std::uint32_t thread_id_t;
+	static const thread_id_t invalid_thread_id  = 0;			// See http://blogs.msdn.com/b/oldnewthing/archive/2004/02/23/78395.aspx
+	static const thread_id_t invalid_thread_id2 = 0xFFFFFFFFU;	// Not technically guaranteed to be invalid, but is never used in practice. Note that all Win32 thread IDs are presently multiples of 4.
+	static inline thread_id_t thread_id() { return static_cast<thread_id_t>(::GetCurrentThreadId()); }
+} }
+#elif defined(__arm__) || defined(_M_ARM) || defined(__aarch64__) || (defined(__APPLE__) && TARGET_OS_IPHONE) || defined(MOODYCAMEL_NO_THREAD_LOCAL)
+namespace moodycamel { namespace details {
+	static_assert(sizeof(std::thread::id) == 4 || sizeof(std::thread::id) == 8, "std::thread::id is expected to be either 4 or 8 bytes");
+
+	typedef std::thread::id thread_id_t;
+	static const thread_id_t invalid_thread_id;         // Default ctor creates invalid ID
+
+	// Note we don't define a invalid_thread_id2 since std::thread::id doesn't have one; it's
+	// only used if MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED is defined anyway, which it won't
+	// be.
+	static inline thread_id_t thread_id() { return std::this_thread::get_id(); }
+
+	template<std::size_t> struct thread_id_size { };
+	template<> struct thread_id_size<4> { typedef std::uint32_t numeric_t; };
+	template<> struct thread_id_size<8> { typedef std::uint64_t numeric_t; };
+
+	template<> struct thread_id_converter<thread_id_t> {
+		typedef thread_id_size<sizeof(thread_id_t)>::numeric_t thread_id_numeric_size_t;
+#ifndef __APPLE__
+		typedef std::size_t thread_id_hash_t;
+#else
+		typedef thread_id_numeric_size_t thread_id_hash_t;
+#endif
+
+		static thread_id_hash_t prehash(thread_id_t const& x)
+		{
+#ifndef __APPLE__
+			return std::hash<std::thread::id>()(x);
+#else
+			return *reinterpret_cast<thread_id_hash_t const*>(&x);
+#endif
+		}
+	};
+} }
+#else
+// Use a nice trick from this answer: http://stackoverflow.com/a/8438730/21475
+// In order to get a numeric thread ID in a platform-independent way, we use a thread-local
+// static variable's address as a thread identifier :-)
+#if defined(__GNUC__) || defined(__INTEL_COMPILER)
+#define MOODYCAMEL_THREADLOCAL __thread
+#elif defined(_MSC_VER)
+#define MOODYCAMEL_THREADLOCAL __declspec(thread)
+#else
+// Assume C++11 compliant compiler
+#define MOODYCAMEL_THREADLOCAL thread_local
+#endif
+namespace moodycamel { namespace details {
+	typedef std::uintptr_t thread_id_t;
+	static const thread_id_t invalid_thread_id  = 0;		// Address can't be nullptr
+	static const thread_id_t invalid_thread_id2 = 1;		// Member accesses off a null pointer are also generally invalid. Plus it's not aligned.
+	inline thread_id_t thread_id() { static MOODYCAMEL_THREADLOCAL int x; return reinterpret_cast<thread_id_t>(&x); }
+} }
+#endif
+
+// Constexpr if
+#ifndef MOODYCAMEL_CONSTEXPR_IF
+#if (defined(_MSC_VER) && defined(_HAS_CXX17) && _HAS_CXX17) || __cplusplus > 201402L
+#define MOODYCAMEL_CONSTEXPR_IF if constexpr
+#define MOODYCAMEL_MAYBE_UNUSED [[maybe_unused]]
+#else
+#define MOODYCAMEL_CONSTEXPR_IF if
+#define MOODYCAMEL_MAYBE_UNUSED
+#endif
+#endif
+
+// Exceptions
+#ifndef MOODYCAMEL_EXCEPTIONS_ENABLED
+#if (defined(_MSC_VER) && defined(_CPPUNWIND)) || (defined(__GNUC__) && defined(__EXCEPTIONS)) || (!defined(_MSC_VER) && !defined(__GNUC__))
+#define MOODYCAMEL_EXCEPTIONS_ENABLED
+#endif
+#endif
+#ifdef MOODYCAMEL_EXCEPTIONS_ENABLED
+#define MOODYCAMEL_TRY try
+#define MOODYCAMEL_CATCH(...) catch(__VA_ARGS__)
+#define MOODYCAMEL_RETHROW throw
+#define MOODYCAMEL_THROW(expr) throw (expr)
+#else
+#define MOODYCAMEL_TRY MOODYCAMEL_CONSTEXPR_IF (true)
+#define MOODYCAMEL_CATCH(...) else MOODYCAMEL_CONSTEXPR_IF (false)
+#define MOODYCAMEL_RETHROW
+#define MOODYCAMEL_THROW(expr)
+#endif
+
+#ifndef MOODYCAMEL_NOEXCEPT
+#if !defined(MOODYCAMEL_EXCEPTIONS_ENABLED)
+#define MOODYCAMEL_NOEXCEPT
+#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) true
+#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) true
+#elif defined(_MSC_VER) && defined(_NOEXCEPT) && _MSC_VER < 1800
+// VS2012's std::is_nothrow_[move_]constructible is broken and returns true when it shouldn't :-(
+// We have to assume *all* non-trivial constructors may throw on VS2012!
+#define MOODYCAMEL_NOEXCEPT _NOEXCEPT
+#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) (std::is_rvalue_reference<valueType>::value && std::is_move_constructible<type>::value ? std::is_trivially_move_constructible<type>::value : std::is_trivially_copy_constructible<type>::value)
+#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) ((std::is_rvalue_reference<valueType>::value && std::is_move_assignable<type>::value ? std::is_trivially_move_assignable<type>::value || std::is_nothrow_move_assignable<type>::value : std::is_trivially_copy_assignable<type>::value || std::is_nothrow_copy_assignable<type>::value) && MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr))
+#elif defined(_MSC_VER) && defined(_NOEXCEPT) && _MSC_VER < 1900
+#define MOODYCAMEL_NOEXCEPT _NOEXCEPT
+#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) (std::is_rvalue_reference<valueType>::value && std::is_move_constructible<type>::value ? std::is_trivially_move_constructible<type>::value || std::is_nothrow_move_constructible<type>::value : std::is_trivially_copy_constructible<type>::value || std::is_nothrow_copy_constructible<type>::value)
+#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) ((std::is_rvalue_reference<valueType>::value && std::is_move_assignable<type>::value ? std::is_trivially_move_assignable<type>::value || std::is_nothrow_move_assignable<type>::value : std::is_trivially_copy_assignable<type>::value || std::is_nothrow_copy_assignable<type>::value) && MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr))
+#else
+#define MOODYCAMEL_NOEXCEPT noexcept
+#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) noexcept(expr)
+#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) noexcept(expr)
+#endif
+#endif
+
+#ifndef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
+#ifdef MCDBGQ_USE_RELACY
+#define MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
+#else
+// VS2013 doesn't support `thread_local`, and MinGW-w64 w/ POSIX threading has a crippling bug: http://sourceforge.net/p/mingw-w64/bugs/445
+// g++ <=4.7 doesn't support thread_local either.
+// Finally, iOS/ARM doesn't have support for it either, and g++/ARM allows it to compile but it's unconfirmed to actually work
+#if (!defined(_MSC_VER) || _MSC_VER >= 1900) && (!defined(__MINGW32__) && !defined(__MINGW64__) || !defined(__WINPTHREADS_VERSION)) && (!defined(__GNUC__) || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) && (!defined(__APPLE__) || !TARGET_OS_IPHONE) && !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__)
+// Assume `thread_local` is fully supported in all other C++11 compilers/platforms
+#define MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED    // tentatively enabled for now; years ago several users report having problems with it on
+#endif
+#endif
+#endif
+
+// VS2012 doesn't support deleted functions.
+// In this case, we declare the function normally but don't define it. A link error will be generated if the function is called.
+#ifndef MOODYCAMEL_DELETE_FUNCTION
+#if defined(_MSC_VER) && _MSC_VER < 1800
+#define MOODYCAMEL_DELETE_FUNCTION
+#else
+#define MOODYCAMEL_DELETE_FUNCTION = delete
+#endif
+#endif
+
+namespace moodycamel { namespace details {
+#ifndef MOODYCAMEL_ALIGNAS
+// VS2013 doesn't support alignas or alignof, and align() requires a constant literal
+#if defined(_MSC_VER) && _MSC_VER <= 1800
+#define MOODYCAMEL_ALIGNAS(alignment) __declspec(align(alignment))
+#define MOODYCAMEL_ALIGNOF(obj) __alignof(obj)
+#define MOODYCAMEL_ALIGNED_TYPE_LIKE(T, obj) typename details::Vs2013Aligned<std::alignment_of<obj>::value, T>::type
+	template<int Align, typename T> struct Vs2013Aligned { };  // default, unsupported alignment
+	template<typename T> struct Vs2013Aligned<1, T> { typedef __declspec(align(1)) T type; };
+	template<typename T> struct Vs2013Aligned<2, T> { typedef __declspec(align(2)) T type; };
+	template<typename T> struct Vs2013Aligned<4, T> { typedef __declspec(align(4)) T type; };
+	template<typename T> struct Vs2013Aligned<8, T> { typedef __declspec(align(8)) T type; };
+	template<typename T> struct Vs2013Aligned<16, T> { typedef __declspec(align(16)) T type; };
+	template<typename T> struct Vs2013Aligned<32, T> { typedef __declspec(align(32)) T type; };
+	template<typename T> struct Vs2013Aligned<64, T> { typedef __declspec(align(64)) T type; };
+	template<typename T> struct Vs2013Aligned<128, T> { typedef __declspec(align(128)) T type; };
+	template<typename T> struct Vs2013Aligned<256, T> { typedef __declspec(align(256)) T type; };
+#else
+	template<typename T> struct identity { typedef T type; };
+#define MOODYCAMEL_ALIGNAS(alignment) alignas(alignment)
+#define MOODYCAMEL_ALIGNOF(obj) alignof(obj)
+#define MOODYCAMEL_ALIGNED_TYPE_LIKE(T, obj) alignas(alignof(obj)) typename details::identity<T>::type
+#endif
+#endif
+} }
+
+
+// TSAN can false report races in lock-free code.  To enable TSAN to be used from projects that use this one,
+// we can apply per-function compile-time suppression.
+// See https://clang.llvm.org/docs/ThreadSanitizer.html#has-feature-thread-sanitizer
+#define MOODYCAMEL_NO_TSAN
+#if defined(__has_feature)
+ #if __has_feature(thread_sanitizer)
+  #undef MOODYCAMEL_NO_TSAN
+  #define MOODYCAMEL_NO_TSAN __attribute__((no_sanitize("thread")))
+ #endif // TSAN
+#endif // TSAN
+
+// Compiler-specific likely/unlikely hints
+namespace moodycamel { namespace details {
+#if defined(__GNUC__)
+	static inline bool (likely)(bool x) { return __builtin_expect((x), true); }
+	static inline bool (unlikely)(bool x) { return __builtin_expect((x), false); }
+#else
+	static inline bool (likely)(bool x) { return x; }
+	static inline bool (unlikely)(bool x) { return x; }
+#endif
+} }
+
+#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
+#include "internal/concurrentqueue_internal_debug.h"
+#endif
+
+namespace moodycamel {
+namespace details {
+	template<typename T>
+	struct const_numeric_max {
+		static_assert(std::is_integral<T>::value, "const_numeric_max can only be used with integers");
+		static const T value = std::numeric_limits<T>::is_signed
+			? (static_cast<T>(1) << (sizeof(T) * CHAR_BIT - 1)) - static_cast<T>(1)
+			: static_cast<T>(-1);
+	};
+
+#if defined(__GLIBCXX__)
+	typedef ::max_align_t std_max_align_t;      // libstdc++ forgot to add it to std:: for a while
+#else
+	typedef std::max_align_t std_max_align_t;   // Others (e.g. MSVC) insist it can *only* be accessed via std::
+#endif
+
+	// Some platforms have incorrectly set max_align_t to a type with <8 bytes alignment even while supporting
+	// 8-byte aligned scalar values (*cough* 32-bit iOS). Work around this with our own union. See issue #64.
+	typedef union {
+		std_max_align_t x;
+		long long y;
+		void* z;
+	} max_align_t;
+}
+
+// Default traits for the ConcurrentQueue. To change some of the
+// traits without re-implementing all of them, inherit from this
+// struct and shadow the declarations you wish to be different;
+// since the traits are used as a template type parameter, the
+// shadowed declarations will be used where defined, and the defaults
+// otherwise.
+struct ConcurrentQueueDefaultTraits
+{
+	// General-purpose size type. std::size_t is strongly recommended.
+	typedef std::size_t size_t;
+
+	// The type used for the enqueue and dequeue indices. Must be at least as
+	// large as size_t. Should be significantly larger than the number of elements
+	// you expect to hold at once, especially if you have a high turnover rate;
+	// for example, on 32-bit x86, if you expect to have over a hundred million
+	// elements or pump several million elements through your queue in a very
+	// short space of time, using a 32-bit type *may* trigger a race condition.
+	// A 64-bit int type is recommended in that case, and in practice will
+	// prevent a race condition no matter the usage of the queue. Note that
+	// whether the queue is lock-free with a 64-int type depends on the whether
+	// std::atomic<std::uint64_t> is lock-free, which is platform-specific.
+	typedef std::size_t index_t;
+
+	// Internally, all elements are enqueued and dequeued from multi-element
+	// blocks; this is the smallest controllable unit. If you expect few elements
+	// but many producers, a smaller block size should be favoured. For few producers
+	// and/or many elements, a larger block size is preferred. A sane default
+	// is provided. Must be a power of 2.
+	static const size_t BLOCK_SIZE = 32;
+
+	// For explicit producers (i.e. when using a producer token), the block is
+	// checked for being empty by iterating through a list of flags, one per element.
+	// For large block sizes, this is too inefficient, and switching to an atomic
+	// counter-based approach is faster. The switch is made for block sizes strictly
+	// larger than this threshold.
+	static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = 32;
+
+	// How many full blocks can be expected for a single explicit producer? This should
+	// reflect that number's maximum for optimal performance. Must be a power of 2.
+	static const size_t EXPLICIT_INITIAL_INDEX_SIZE = 32;
+
+	// How many full blocks can be expected for a single implicit producer? This should
+	// reflect that number's maximum for optimal performance. Must be a power of 2.
+	static const size_t IMPLICIT_INITIAL_INDEX_SIZE = 32;
+
+	// The initial size of the hash table mapping thread IDs to implicit producers.
+	// Note that the hash is resized every time it becomes half full.
+	// Must be a power of two, and either 0 or at least 1. If 0, implicit production
+	// (using the enqueue methods without an explicit producer token) is disabled.
+	static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = 32;
+
+	// Controls the number of items that an explicit consumer (i.e. one with a token)
+	// must consume before it causes all consumers to rotate and move on to the next
+	// internal queue.
+	static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = 256;
+
+	// The maximum number of elements (inclusive) that can be enqueued to a sub-queue.
+	// Enqueue operations that would cause this limit to be surpassed will fail. Note
+	// that this limit is enforced at the block level (for performance reasons), i.e.
+	// it's rounded up to the nearest block size.
+	static const size_t MAX_SUBQUEUE_SIZE = details::const_numeric_max<size_t>::value;
+
+	// The number of times to spin before sleeping when waiting on a semaphore.
+	// Recommended values are on the order of 1000-10000 unless the number of
+	// consumer threads exceeds the number of idle cores (in which case try 0-100).
+	// Only affects instances of the BlockingConcurrentQueue.
+	static const int MAX_SEMA_SPINS = 10000;
+
+	// Whether to recycle dynamically-allocated blocks into an internal free list or
+	// not. If false, only pre-allocated blocks (controlled by the constructor
+	// arguments) will be recycled, and all others will be `free`d back to the heap.
+	// Note that blocks consumed by explicit producers are only freed on destruction
+	// of the queue (not following destruction of the token) regardless of this trait.
+	static const bool RECYCLE_ALLOCATED_BLOCKS = false;
+
+
+#ifndef MCDBGQ_USE_RELACY
+	// Memory allocation can be customized if needed.
+	// malloc should return nullptr on failure, and handle alignment like std::malloc.
+#if defined(malloc) || defined(free)
+	// Gah, this is 2015, stop defining macros that break standard code already!
+	// Work around malloc/free being special macros:
+	static inline void* WORKAROUND_malloc(size_t size) { return malloc(size); }
+	static inline void WORKAROUND_free(void* ptr) { return free(ptr); }
+	static inline void* (malloc)(size_t size) { return WORKAROUND_malloc(size); }
+	static inline void (free)(void* ptr) { return WORKAROUND_free(ptr); }
+#else
+	static inline void* malloc(size_t size) { return std::malloc(size); }
+	static inline void free(void* ptr) { return std::free(ptr); }
+#endif
+#else
+	// Debug versions when running under the Relacy race detector (ignore
+	// these in user code)
+	static inline void* malloc(size_t size) { return rl::rl_malloc(size, $); }
+	static inline void free(void* ptr) { return rl::rl_free(ptr, $); }
+#endif
+};
+
+
+// When producing or consuming many elements, the most efficient way is to:
+//    1) Use one of the bulk-operation methods of the queue with a token
+//    2) Failing that, use the bulk-operation methods without a token
+//    3) Failing that, create a token and use that with the single-item methods
+//    4) Failing that, use the single-parameter methods of the queue
+// Having said that, don't create tokens willy-nilly -- ideally there should be
+// a maximum of one token per thread (of each kind).
+struct ProducerToken;
+struct ConsumerToken;
+
+template<typename T, typename Traits> class ConcurrentQueue;
+template<typename T, typename Traits> class BlockingConcurrentQueue;
+class ConcurrentQueueTests;
+
+
+namespace details
+{
+	struct ConcurrentQueueProducerTypelessBase
+	{
+		ConcurrentQueueProducerTypelessBase* next;
+		std::atomic<bool> inactive;
+		ProducerToken* token;
+
+		ConcurrentQueueProducerTypelessBase()
+			: next(nullptr), inactive(false), token(nullptr)
+		{
+		}
+	};
+
+	template<bool use32> struct _hash_32_or_64 {
+		static inline std::uint32_t hash(std::uint32_t h)
+		{
+			// MurmurHash3 finalizer -- see https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
+			// Since the thread ID is already unique, all we really want to do is propagate that
+			// uniqueness evenly across all the bits, so that we can use a subset of the bits while
+			// reducing collisions significantly
+			h ^= h >> 16;
+			h *= 0x85ebca6b;
+			h ^= h >> 13;
+			h *= 0xc2b2ae35;
+			return h ^ (h >> 16);
+		}
+	};
+	template<> struct _hash_32_or_64<1> {
+		static inline std::uint64_t hash(std::uint64_t h)
+		{
+			h ^= h >> 33;
+			h *= 0xff51afd7ed558ccd;
+			h ^= h >> 33;
+			h *= 0xc4ceb9fe1a85ec53;
+			return h ^ (h >> 33);
+		}
+	};
+	template<std::size_t size> struct hash_32_or_64 : public _hash_32_or_64<(size > 4)> {  };
+
+	static inline size_t hash_thread_id(thread_id_t id)
+	{
+		static_assert(sizeof(thread_id_t) <= 8, "Expected a platform where thread IDs are at most 64-bit values");
+		return static_cast<size_t>(hash_32_or_64<sizeof(thread_id_converter<thread_id_t>::thread_id_hash_t)>::hash(
+			thread_id_converter<thread_id_t>::prehash(id)));
+	}
+
+	template<typename T>
+	static inline bool circular_less_than(T a, T b)
+	{
+		static_assert(std::is_integral<T>::value && !std::numeric_limits<T>::is_signed, "circular_less_than is intended to be used only with unsigned integer types");
+		return static_cast<T>(a - b) > static_cast<T>(static_cast<T>(1) << (static_cast<T>(sizeof(T) * CHAR_BIT - 1)));
+		// Note: extra parens around rhs of operator<< is MSVC bug: https://developercommunity2.visualstudio.com/t/C4554-triggers-when-both-lhs-and-rhs-is/10034931
+		//       silencing the bug requires #pragma warning(disable: 4554) around the calling code and has no effect when done here.
+	}
+
+	template<typename U>
+	static inline char* align_for(char* ptr)
+	{
+		const std::size_t alignment = std::alignment_of<U>::value;
+		return ptr + (alignment - (reinterpret_cast<std::uintptr_t>(ptr) % alignment)) % alignment;
+	}
+
+	template<typename T>
+	static inline T ceil_to_pow_2(T x)
+	{
+		static_assert(std::is_integral<T>::value && !std::numeric_limits<T>::is_signed, "ceil_to_pow_2 is intended to be used only with unsigned integer types");
+
+		// Adapted from http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
+		--x;
+		x |= x >> 1;
+		x |= x >> 2;
+		x |= x >> 4;
+		for (std::size_t i = 1; i < sizeof(T); i <<= 1) {
+			x |= x >> (i << 3);
+		}
+		++x;
+		return x;
+	}
+
+	template<typename T>
+	static inline void swap_relaxed(std::atomic<T>& left, std::atomic<T>& right)
+	{
+		T temp = std::move(left.load(std::memory_order_relaxed));
+		left.store(std::move(right.load(std::memory_order_relaxed)), std::memory_order_relaxed);
+		right.store(std::move(temp), std::memory_order_relaxed);
+	}
+
+	template<typename T>
+	static inline T const& nomove(T const& x)
+	{
+		return x;
+	}
+
+	template<bool Enable>
+	struct nomove_if
+	{
+		template<typename T>
+		static inline T const& eval(T const& x)
+		{
+			return x;
+		}
+	};
+
+	template<>
+	struct nomove_if<false>
+	{
+		template<typename U>
+		static inline auto eval(U&& x)
+			-> decltype(std::forward<U>(x))
+		{
+			return std::forward<U>(x);
+		}
+	};
+
+	template<typename It>
+	static inline auto deref_noexcept(It& it) MOODYCAMEL_NOEXCEPT -> decltype(*it)
+	{
+		return *it;
+	}
+
+#if defined(__clang__) || !defined(__GNUC__) || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)
+	template<typename T> struct is_trivially_destructible : std::is_trivially_destructible<T> { };
+#else
+	template<typename T> struct is_trivially_destructible : std::has_trivial_destructor<T> { };
+#endif
+
+#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
+#ifdef MCDBGQ_USE_RELACY
+	typedef RelacyThreadExitListener ThreadExitListener;
+	typedef RelacyThreadExitNotifier ThreadExitNotifier;
+#else
+	class ThreadExitNotifier;
+
+	struct ThreadExitListener
+	{
+		typedef void (*callback_t)(void*);
+		callback_t callback;
+		void* userData;
+
+		ThreadExitListener* next;		// reserved for use by the ThreadExitNotifier
+		ThreadExitNotifier* chain;		// reserved for use by the ThreadExitNotifier
+	};
+
+	class ThreadExitNotifier
+	{
+	public:
+		static void subscribe(ThreadExitListener* listener)
+		{
+			auto& tlsInst = instance();
+			std::lock_guard<std::mutex> guard(mutex());
+			listener->next = tlsInst.tail;
+			listener->chain = &tlsInst;
+			tlsInst.tail = listener;
+		}
+
+		static void unsubscribe(ThreadExitListener* listener)
+		{
+			std::lock_guard<std::mutex> guard(mutex());
+			if (!listener->chain) {
+				return;  // race with ~ThreadExitNotifier
+			}
+			auto& tlsInst = *listener->chain;
+			listener->chain = nullptr;
+			ThreadExitListener** prev = &tlsInst.tail;
+			for (auto ptr = tlsInst.tail; ptr != nullptr; ptr = ptr->next) {
+				if (ptr == listener) {
+					*prev = ptr->next;
+					break;
+				}
+				prev = &ptr->next;
+			}
+		}
+
+	private:
+		ThreadExitNotifier() : tail(nullptr) { }
+		ThreadExitNotifier(ThreadExitNotifier const&) MOODYCAMEL_DELETE_FUNCTION;
+		ThreadExitNotifier& operator=(ThreadExitNotifier const&) MOODYCAMEL_DELETE_FUNCTION;
+
+		~ThreadExitNotifier()
+		{
+			// This thread is about to exit, let everyone know!
+			assert(this == &instance() && "If this assert fails, you likely have a buggy compiler! Change the preprocessor conditions such that MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED is no longer defined.");
+			std::lock_guard<std::mutex> guard(mutex());
+			for (auto ptr = tail; ptr != nullptr; ptr = ptr->next) {
+				ptr->chain = nullptr;
+				ptr->callback(ptr->userData);
+			}
+		}
+
+		// Thread-local
+		static inline ThreadExitNotifier& instance()
+		{
+			static thread_local ThreadExitNotifier notifier;
+			return notifier;
+		}
+
+		static inline std::mutex& mutex()
+		{
+			// Must be static because the ThreadExitNotifier could be destroyed while unsubscribe is called
+			static std::mutex mutex;
+			return mutex;
+		}
+
+	private:
+		ThreadExitListener* tail;
+	};
+#endif
+#endif
+
+	template<typename T> struct static_is_lock_free_num { enum { value = 0 }; };
+	template<> struct static_is_lock_free_num<signed char> { enum { value = ATOMIC_CHAR_LOCK_FREE }; };
+	template<> struct static_is_lock_free_num<short> { enum { value = ATOMIC_SHORT_LOCK_FREE }; };
+	template<> struct static_is_lock_free_num<int> { enum { value = ATOMIC_INT_LOCK_FREE }; };
+	template<> struct static_is_lock_free_num<long> { enum { value = ATOMIC_LONG_LOCK_FREE }; };
+	template<> struct static_is_lock_free_num<long long> { enum { value = ATOMIC_LLONG_LOCK_FREE }; };
+	template<typename T> struct static_is_lock_free : static_is_lock_free_num<typename std::make_signed<T>::type> {  };
+	template<> struct static_is_lock_free<bool> { enum { value = ATOMIC_BOOL_LOCK_FREE }; };
+	template<typename U> struct static_is_lock_free<U*> { enum { value = ATOMIC_POINTER_LOCK_FREE }; };
+}
+
+
+struct ProducerToken
+{
+	template<typename T, typename Traits>
+	explicit ProducerToken(ConcurrentQueue<T, Traits>& queue);
+
+	template<typename T, typename Traits>
+	explicit ProducerToken(BlockingConcurrentQueue<T, Traits>& queue);
+
+	ProducerToken(ProducerToken&& other) MOODYCAMEL_NOEXCEPT
+		: producer(other.producer)
+	{
+		other.producer = nullptr;
+		if (producer != nullptr) {
+			producer->token = this;
+		}
+	}
+
+	inline ProducerToken& operator=(ProducerToken&& other) MOODYCAMEL_NOEXCEPT
+	{
+		swap(other);
+		return *this;
+	}
+
+	void swap(ProducerToken& other) MOODYCAMEL_NOEXCEPT
+	{
+		std::swap(producer, other.producer);
+		if (producer != nullptr) {
+			producer->token = this;
+		}
+		if (other.producer != nullptr) {
+			other.producer->token = &other;
+		}
+	}
+
+	// A token is always valid unless:
+	//     1) Memory allocation failed during construction
+	//     2) It was moved via the move constructor
+	//        (Note: assignment does a swap, leaving both potentially valid)
+	//     3) The associated queue was destroyed
+	// Note that if valid() returns true, that only indicates
+	// that the token is valid for use with a specific queue,
+	// but not which one; that's up to the user to track.
+	inline bool valid() const { return producer != nullptr; }
+
+	~ProducerToken()
+	{
+		if (producer != nullptr) {
+			producer->token = nullptr;
+			producer->inactive.store(true, std::memory_order_release);
+		}
+	}
+
+	// Disable copying and assignment
+	ProducerToken(ProducerToken const&) MOODYCAMEL_DELETE_FUNCTION;
+	ProducerToken& operator=(ProducerToken const&) MOODYCAMEL_DELETE_FUNCTION;
+
+private:
+	template<typename T, typename Traits> friend class ConcurrentQueue;
+	friend class ConcurrentQueueTests;
+
+protected:
+	details::ConcurrentQueueProducerTypelessBase* producer;
+};
+
+
+struct ConsumerToken
+{
+	template<typename T, typename Traits>
+	explicit ConsumerToken(ConcurrentQueue<T, Traits>& q);
+
+	template<typename T, typename Traits>
+	explicit ConsumerToken(BlockingConcurrentQueue<T, Traits>& q);
+
+	ConsumerToken(ConsumerToken&& other) MOODYCAMEL_NOEXCEPT
+		: initialOffset(other.initialOffset), lastKnownGlobalOffset(other.lastKnownGlobalOffset), itemsConsumedFromCurrent(other.itemsConsumedFromCurrent), currentProducer(other.currentProducer), desiredProducer(other.desiredProducer)
+	{
+	}
+
+	inline ConsumerToken& operator=(ConsumerToken&& other) MOODYCAMEL_NOEXCEPT
+	{
+		swap(other);
+		return *this;
+	}
+
+	void swap(ConsumerToken& other) MOODYCAMEL_NOEXCEPT
+	{
+		std::swap(initialOffset, other.initialOffset);
+		std::swap(lastKnownGlobalOffset, other.lastKnownGlobalOffset);
+		std::swap(itemsConsumedFromCurrent, other.itemsConsumedFromCurrent);
+		std::swap(currentProducer, other.currentProducer);
+		std::swap(desiredProducer, other.desiredProducer);
+	}
+
+	// Disable copying and assignment
+	ConsumerToken(ConsumerToken const&) MOODYCAMEL_DELETE_FUNCTION;
+	ConsumerToken& operator=(ConsumerToken const&) MOODYCAMEL_DELETE_FUNCTION;
+
+private:
+	template<typename T, typename Traits> friend class ConcurrentQueue;
+	friend class ConcurrentQueueTests;
+
+private: // but shared with ConcurrentQueue
+	std::uint32_t initialOffset;
+	std::uint32_t lastKnownGlobalOffset;
+	std::uint32_t itemsConsumedFromCurrent;
+	details::ConcurrentQueueProducerTypelessBase* currentProducer;
+	details::ConcurrentQueueProducerTypelessBase* desiredProducer;
+};
+
+// Need to forward-declare this swap because it's in a namespace.
+// See http://stackoverflow.com/questions/4492062/why-does-a-c-friend-class-need-a-forward-declaration-only-in-other-namespaces
+template<typename T, typename Traits>
+inline void swap(typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP& a, typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP& b) MOODYCAMEL_NOEXCEPT;
+
+
+template<typename T, typename Traits = ConcurrentQueueDefaultTraits>
+class ConcurrentQueue
+{
+public:
+	typedef ::moodycamel::ProducerToken producer_token_t;
+	typedef ::moodycamel::ConsumerToken consumer_token_t;
+
+	typedef typename Traits::index_t index_t;
+	typedef typename Traits::size_t size_t;
+
+	static const size_t BLOCK_SIZE = static_cast<size_t>(Traits::BLOCK_SIZE);
+	static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = static_cast<size_t>(Traits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD);
+	static const size_t EXPLICIT_INITIAL_INDEX_SIZE = static_cast<size_t>(Traits::EXPLICIT_INITIAL_INDEX_SIZE);
+	static const size_t IMPLICIT_INITIAL_INDEX_SIZE = static_cast<size_t>(Traits::IMPLICIT_INITIAL_INDEX_SIZE);
+	static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = static_cast<size_t>(Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE);
+	static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = static_cast<std::uint32_t>(Traits::EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE);
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable: 4307)		// + integral constant overflow (that's what the ternary expression is for!)
+#pragma warning(disable: 4309)		// static_cast: Truncation of constant value
+#endif
+	static const size_t MAX_SUBQUEUE_SIZE = (details::const_numeric_max<size_t>::value - static_cast<size_t>(Traits::MAX_SUBQUEUE_SIZE) < BLOCK_SIZE) ? details::const_numeric_max<size_t>::value : ((static_cast<size_t>(Traits::MAX_SUBQUEUE_SIZE) + (BLOCK_SIZE - 1)) / BLOCK_SIZE * BLOCK_SIZE);
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+	static_assert(!std::numeric_limits<size_t>::is_signed && std::is_integral<size_t>::value, "Traits::size_t must be an unsigned integral type");
+	static_assert(!std::numeric_limits<index_t>::is_signed && std::is_integral<index_t>::value, "Traits::index_t must be an unsigned integral type");
+	static_assert(sizeof(index_t) >= sizeof(size_t), "Traits::index_t must be at least as wide as Traits::size_t");
+	static_assert((BLOCK_SIZE > 1) && !(BLOCK_SIZE & (BLOCK_SIZE - 1)), "Traits::BLOCK_SIZE must be a power of 2 (and at least 2)");
+	static_assert((EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD > 1) && !(EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD & (EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD - 1)), "Traits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD must be a power of 2 (and greater than 1)");
+	static_assert((EXPLICIT_INITIAL_INDEX_SIZE > 1) && !(EXPLICIT_INITIAL_INDEX_SIZE & (EXPLICIT_INITIAL_INDEX_SIZE - 1)), "Traits::EXPLICIT_INITIAL_INDEX_SIZE must be a power of 2 (and greater than 1)");
+	static_assert((IMPLICIT_INITIAL_INDEX_SIZE > 1) && !(IMPLICIT_INITIAL_INDEX_SIZE & (IMPLICIT_INITIAL_INDEX_SIZE - 1)), "Traits::IMPLICIT_INITIAL_INDEX_SIZE must be a power of 2 (and greater than 1)");
+	static_assert((INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) || !(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE & (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE - 1)), "Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE must be a power of 2");
+	static_assert(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0 || INITIAL_IMPLICIT_PRODUCER_HASH_SIZE >= 1, "Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE must be at least 1 (or 0 to disable implicit enqueueing)");
+
+public:
+	// Creates a queue with at least `capacity` element slots; note that the
+	// actual number of elements that can be inserted without additional memory
+	// allocation depends on the number of producers and the block size (e.g. if
+	// the block size is equal to `capacity`, only a single block will be allocated
+	// up-front, which means only a single producer will be able to enqueue elements
+	// without an extra allocation -- blocks aren't shared between producers).
+	// This method is not thread safe -- it is up to the user to ensure that the
+	// queue is fully constructed before it starts being used by other threads (this
+	// includes making the memory effects of construction visible, possibly with a
+	// memory barrier).
+	explicit ConcurrentQueue(size_t capacity = 32 * BLOCK_SIZE)
+		: producerListTail(nullptr),
+		producerCount(0),
+		initialBlockPoolIndex(0),
+		nextExplicitConsumerId(0),
+		globalExplicitConsumerOffset(0)
+	{
+		implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);
+		populate_initial_implicit_producer_hash();
+		populate_initial_block_list(capacity / BLOCK_SIZE + ((capacity & (BLOCK_SIZE - 1)) == 0 ? 0 : 1));
+
+#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
+		// Track all the producers using a fully-resolved typed list for
+		// each kind; this makes it possible to debug them starting from
+		// the root queue object (otherwise wacky casts are needed that
+		// don't compile in the debugger's expression evaluator).
+		explicitProducers.store(nullptr, std::memory_order_relaxed);
+		implicitProducers.store(nullptr, std::memory_order_relaxed);
+#endif
+	}
+
+	// Computes the correct amount of pre-allocated blocks for you based
+	// on the minimum number of elements you want available at any given
+	// time, and the maximum concurrent number of each type of producer.
+	ConcurrentQueue(size_t minCapacity, size_t maxExplicitProducers, size_t maxImplicitProducers)
+		: producerListTail(nullptr),
+		producerCount(0),
+		initialBlockPoolIndex(0),
+		nextExplicitConsumerId(0),
+		globalExplicitConsumerOffset(0)
+	{
+		implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);
+		populate_initial_implicit_producer_hash();
+		size_t blocks = (((minCapacity + BLOCK_SIZE - 1) / BLOCK_SIZE) - 1) * (maxExplicitProducers + 1) + 2 * (maxExplicitProducers + maxImplicitProducers);
+		populate_initial_block_list(blocks);
+
+#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
+		explicitProducers.store(nullptr, std::memory_order_relaxed);
+		implicitProducers.store(nullptr, std::memory_order_relaxed);
+#endif
+	}
+
+	// Note: The queue should not be accessed concurrently while it's
+	// being deleted. It's up to the user to synchronize this.
+	// This method is not thread safe.
+	~ConcurrentQueue()
+	{
+		// Destroy producers
+		auto ptr = producerListTail.load(std::memory_order_relaxed);
+		while (ptr != nullptr) {
+			auto next = ptr->next_prod();
+			if (ptr->token != nullptr) {
+				ptr->token->producer = nullptr;
+			}
+			destroy(ptr);
+			ptr = next;
+		}
+
+		// Destroy implicit producer hash tables
+		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE != 0) {
+			auto hash = implicitProducerHash.load(std::memory_order_relaxed);
+			while (hash != nullptr) {
+				auto prev = hash->prev;
+				if (prev != nullptr) {		// The last hash is part of this object and was not allocated dynamically
+					for (size_t i = 0; i != hash->capacity; ++i) {
+						hash->entries[i].~ImplicitProducerKVP();
+					}
+					hash->~ImplicitProducerHash();
+					(Traits::free)(hash);
+				}
+				hash = prev;
+			}
+		}
+
+		// Destroy global free list
+		auto block = freeList.head_unsafe();
+		while (block != nullptr) {
+			auto next = block->freeListNext.load(std::memory_order_relaxed);
+			if (block->dynamicallyAllocated) {
+				destroy(block);
+			}
+			block = next;
+		}
+
+		// Destroy initial free list
+		destroy_array(initialBlockPool, initialBlockPoolSize);
+	}
+
+	// Disable copying and copy assignment
+	ConcurrentQueue(ConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION;
+	ConcurrentQueue& operator=(ConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION;
+
+	// Moving is supported, but note that it is *not* a thread-safe operation.
+	// Nobody can use the queue while it's being moved, and the memory effects
+	// of that move must be propagated to other threads before they can use it.
+	// Note: When a queue is moved, its tokens are still valid but can only be
+	// used with the destination queue (i.e. semantically they are moved along
+	// with the queue itself).
+	ConcurrentQueue(ConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT
+		: producerListTail(other.producerListTail.load(std::memory_order_relaxed)),
+		producerCount(other.producerCount.load(std::memory_order_relaxed)),
+		initialBlockPoolIndex(other.initialBlockPoolIndex.load(std::memory_order_relaxed)),
+		initialBlockPool(other.initialBlockPool),
+		initialBlockPoolSize(other.initialBlockPoolSize),
+		freeList(std::move(other.freeList)),
+		nextExplicitConsumerId(other.nextExplicitConsumerId.load(std::memory_order_relaxed)),
+		globalExplicitConsumerOffset(other.globalExplicitConsumerOffset.load(std::memory_order_relaxed))
+	{
+		// Move the other one into this, and leave the other one as an empty queue
+		implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);
+		populate_initial_implicit_producer_hash();
+		swap_implicit_producer_hashes(other);
+
+		other.producerListTail.store(nullptr, std::memory_order_relaxed);
+		other.producerCount.store(0, std::memory_order_relaxed);
+		other.nextExplicitConsumerId.store(0, std::memory_order_relaxed);
+		other.globalExplicitConsumerOffset.store(0, std::memory_order_relaxed);
+
+#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
+		explicitProducers.store(other.explicitProducers.load(std::memory_order_relaxed), std::memory_order_relaxed);
+		other.explicitProducers.store(nullptr, std::memory_order_relaxed);
+		implicitProducers.store(other.implicitProducers.load(std::memory_order_relaxed), std::memory_order_relaxed);
+		other.implicitProducers.store(nullptr, std::memory_order_relaxed);
+#endif
+
+		other.initialBlockPoolIndex.store(0, std::memory_order_relaxed);
+		other.initialBlockPoolSize = 0;
+		other.initialBlockPool = nullptr;
+
+		reown_producers();
+	}
+
+	inline ConcurrentQueue& operator=(ConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT
+	{
+		return swap_internal(other);
+	}
+
+	// Swaps this queue's state with the other's. Not thread-safe.
+	// Swapping two queues does not invalidate their tokens, however
+	// the tokens that were created for one queue must be used with
+	// only the swapped queue (i.e. the tokens are tied to the
+	// queue's movable state, not the object itself).
+	inline void swap(ConcurrentQueue& other) MOODYCAMEL_NOEXCEPT
+	{
+		swap_internal(other);
+	}
+
+private:
+	ConcurrentQueue& swap_internal(ConcurrentQueue& other)
+	{
+		if (this == &other) {
+			return *this;
+		}
+
+		details::swap_relaxed(producerListTail, other.producerListTail);
+		details::swap_relaxed(producerCount, other.producerCount);
+		details::swap_relaxed(initialBlockPoolIndex, other.initialBlockPoolIndex);
+		std::swap(initialBlockPool, other.initialBlockPool);
+		std::swap(initialBlockPoolSize, other.initialBlockPoolSize);
+		freeList.swap(other.freeList);
+		details::swap_relaxed(nextExplicitConsumerId, other.nextExplicitConsumerId);
+		details::swap_relaxed(globalExplicitConsumerOffset, other.globalExplicitConsumerOffset);
+
+		swap_implicit_producer_hashes(other);
+
+		reown_producers();
+		other.reown_producers();
+
+#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
+		details::swap_relaxed(explicitProducers, other.explicitProducers);
+		details::swap_relaxed(implicitProducers, other.implicitProducers);
+#endif
+
+		return *this;
+	}
+
+public:
+	// Enqueues a single item (by copying it).
+	// Allocates memory if required. Only fails if memory allocation fails (or implicit
+	// production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0,
+	// or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+	// Thread-safe.
+	inline bool enqueue(T const& item)
+	{
+		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
+		else return inner_enqueue<CanAlloc>(item);
+	}
+
+	// Enqueues a single item (by moving it, if possible).
+	// Allocates memory if required. Only fails if memory allocation fails (or implicit
+	// production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0,
+	// or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+	// Thread-safe.
+	inline bool enqueue(T&& item)
+	{
+		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
+		else return inner_enqueue<CanAlloc>(std::move(item));
+	}
+
+	// Enqueues a single item (by copying it) using an explicit producer token.
+	// Allocates memory if required. Only fails if memory allocation fails (or
+	// Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+	// Thread-safe.
+	inline bool enqueue(producer_token_t const& token, T const& item)
+	{
+		return inner_enqueue<CanAlloc>(token, item);
+	}
+
+	// Enqueues a single item (by moving it, if possible) using an explicit producer token.
+	// Allocates memory if required. Only fails if memory allocation fails (or
+	// Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+	// Thread-safe.
+	inline bool enqueue(producer_token_t const& token, T&& item)
+	{
+		return inner_enqueue<CanAlloc>(token, std::move(item));
+	}
+
+	// Enqueues several items.
+	// Allocates memory if required. Only fails if memory allocation fails (or
+	// implicit production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
+	// is 0, or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+	// Note: Use std::make_move_iterator if the elements should be moved instead of copied.
+	// Thread-safe.
+	template<typename It>
+	bool enqueue_bulk(It itemFirst, size_t count)
+	{
+		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
+		else return inner_enqueue_bulk<CanAlloc>(itemFirst, count);
+	}
+
+	// Enqueues several items using an explicit producer token.
+	// Allocates memory if required. Only fails if memory allocation fails
+	// (or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+	// Note: Use std::make_move_iterator if the elements should be moved
+	// instead of copied.
+	// Thread-safe.
+	template<typename It>
+	bool enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count)
+	{
+		return inner_enqueue_bulk<CanAlloc>(token, itemFirst, count);
+	}
+
+	// Enqueues a single item (by copying it).
+	// Does not allocate memory. Fails if not enough room to enqueue (or implicit
+	// production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
+	// is 0).
+	// Thread-safe.
+	inline bool try_enqueue(T const& item)
+	{
+		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
+		else return inner_enqueue<CannotAlloc>(item);
+	}
+
+	// Enqueues a single item (by moving it, if possible).
+	// Does not allocate memory (except for one-time implicit producer).
+	// Fails if not enough room to enqueue (or implicit production is
+	// disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0).
+	// Thread-safe.
+	inline bool try_enqueue(T&& item)
+	{
+		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
+		else return inner_enqueue<CannotAlloc>(std::move(item));
+	}
+
+	// Enqueues a single item (by copying it) using an explicit producer token.
+	// Does not allocate memory. Fails if not enough room to enqueue.
+	// Thread-safe.
+	inline bool try_enqueue(producer_token_t const& token, T const& item)
+	{
+		return inner_enqueue<CannotAlloc>(token, item);
+	}
+
+	// Enqueues a single item (by moving it, if possible) using an explicit producer token.
+	// Does not allocate memory. Fails if not enough room to enqueue.
+	// Thread-safe.
+	inline bool try_enqueue(producer_token_t const& token, T&& item)
+	{
+		return inner_enqueue<CannotAlloc>(token, std::move(item));
+	}
+
+	// Enqueues several items.
+	// Does not allocate memory (except for one-time implicit producer).
+	// Fails if not enough room to enqueue (or implicit production is
+	// disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0).
+	// Note: Use std::make_move_iterator if the elements should be moved
+	// instead of copied.
+	// Thread-safe.
+	template<typename It>
+	bool try_enqueue_bulk(It itemFirst, size_t count)
+	{
+		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
+		else return inner_enqueue_bulk<CannotAlloc>(itemFirst, count);
+	}
+
+	// Enqueues several items using an explicit producer token.
+	// Does not allocate memory. Fails if not enough room to enqueue.
+	// Note: Use std::make_move_iterator if the elements should be moved
+	// instead of copied.
+	// Thread-safe.
+	template<typename It>
+	bool try_enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count)
+	{
+		return inner_enqueue_bulk<CannotAlloc>(token, itemFirst, count);
+	}
+
+
+
+	// Attempts to dequeue from the queue.
+	// Returns false if all producer streams appeared empty at the time they
+	// were checked (so, the queue is likely but not guaranteed to be empty).
+	// Never allocates. Thread-safe.
+	template<typename U>
+	bool try_dequeue(U& item)
+	{
+		// Instead of simply trying each producer in turn (which could cause needless contention on the first
+		// producer), we score them heuristically.
+		size_t nonEmptyCount = 0;
+		ProducerBase* best = nullptr;
+		size_t bestSize = 0;
+		for (auto ptr = producerListTail.load(std::memory_order_acquire); nonEmptyCount < 3 && ptr != nullptr; ptr = ptr->next_prod()) {
+			auto size = ptr->size_approx();
+			if (size > 0) {
+				if (size > bestSize) {
+					bestSize = size;
+					best = ptr;
+				}
+				++nonEmptyCount;
+			}
+		}
+
+		// If there was at least one non-empty queue but it appears empty at the time
+		// we try to dequeue from it, we need to make sure every queue's been tried
+		if (nonEmptyCount > 0) {
+			if ((details::likely)(best->dequeue(item))) {
+				return true;
+			}
+			for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {
+				if (ptr != best && ptr->dequeue(item)) {
+					return true;
+				}
+			}
+		}
+		return false;
+	}
+
+	// Attempts to dequeue from the queue.
+	// Returns false if all producer streams appeared empty at the time they
+	// were checked (so, the queue is likely but not guaranteed to be empty).
+	// This differs from the try_dequeue(item) method in that this one does
+	// not attempt to reduce contention by interleaving the order that producer
+	// streams are dequeued from. So, using this method can reduce overall throughput
+	// under contention, but will give more predictable results in single-threaded
+	// consumer scenarios. This is mostly only useful for internal unit tests.
+	// Never allocates. Thread-safe.
+	template<typename U>
+	bool try_dequeue_non_interleaved(U& item)
+	{
+		for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {
+			if (ptr->dequeue(item)) {
+				return true;
+			}
+		}
+		return false;
+	}
+
+	// Attempts to dequeue from the queue using an explicit consumer token.
+	// Returns false if all producer streams appeared empty at the time they
+	// were checked (so, the queue is likely but not guaranteed to be empty).
+	// Never allocates. Thread-safe.
+	template<typename U>
+	bool try_dequeue(consumer_token_t& token, U& item)
+	{
+		// The idea is roughly as follows:
+		// Every 256 items from one producer, make everyone rotate (increase the global offset) -> this means the highest efficiency consumer dictates the rotation speed of everyone else, more or less
+		// If you see that the global offset has changed, you must reset your consumption counter and move to your designated place
+		// If there's no items where you're supposed to be, keep moving until you find a producer with some items
+		// If the global offset has not changed but you've run out of items to consume, move over from your current position until you find an producer with something in it
+
+		if (token.desiredProducer == nullptr || token.lastKnownGlobalOffset != globalExplicitConsumerOffset.load(std::memory_order_relaxed)) {
+			if (!update_current_producer_after_rotation(token)) {
+				return false;
+			}
+		}
+
+		// If there was at least one non-empty queue but it appears empty at the time
+		// we try to dequeue from it, we need to make sure every queue's been tried
+		if (static_cast<ProducerBase*>(token.currentProducer)->dequeue(item)) {
+			if (++token.itemsConsumedFromCurrent == EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE) {
+				globalExplicitConsumerOffset.fetch_add(1, std::memory_order_relaxed);
+			}
+			return true;
+		}
+
+		auto tail = producerListTail.load(std::memory_order_acquire);
+		auto ptr = static_cast<ProducerBase*>(token.currentProducer)->next_prod();
+		if (ptr == nullptr) {
+			ptr = tail;
+		}
+		while (ptr != static_cast<ProducerBase*>(token.currentProducer)) {
+			if (ptr->dequeue(item)) {
+				token.currentProducer = ptr;
+				token.itemsConsumedFromCurrent = 1;
+				return true;
+			}
+			ptr = ptr->next_prod();
+			if (ptr == nullptr) {
+				ptr = tail;
+			}
+		}
+		return false;
+	}
+
+	// Attempts to dequeue several elements from the queue.
+	// Returns the number of items actually dequeued.
+	// Returns 0 if all producer streams appeared empty at the time they
+	// were checked (so, the queue is likely but not guaranteed to be empty).
+	// Never allocates. Thread-safe.
+	template<typename It>
+	size_t try_dequeue_bulk(It itemFirst, size_t max)
+	{
+		size_t count = 0;
+		for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {
+			count += ptr->dequeue_bulk(itemFirst, max - count);
+			if (count == max) {
+				break;
+			}
+		}
+		return count;
+	}
+
+	// Attempts to dequeue several elements from the queue using an explicit consumer token.
+	// Returns the number of items actually dequeued.
+	// Returns 0 if all producer streams appeared empty at the time they
+	// were checked (so, the queue is likely but not guaranteed to be empty).
+	// Never allocates. Thread-safe.
+	template<typename It>
+	size_t try_dequeue_bulk(consumer_token_t& token, It itemFirst, size_t max)
+	{
+		if (token.desiredProducer == nullptr || token.lastKnownGlobalOffset != globalExplicitConsumerOffset.load(std::memory_order_relaxed)) {
+			if (!update_current_producer_after_rotation(token)) {
+				return 0;
+			}
+		}
+
+		size_t count = static_cast<ProducerBase*>(token.currentProducer)->dequeue_bulk(itemFirst, max);
+		if (count == max) {
+			if ((token.itemsConsumedFromCurrent += static_cast<std::uint32_t>(max)) >= EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE) {
+				globalExplicitConsumerOffset.fetch_add(1, std::memory_order_relaxed);
+			}
+			return max;
+		}
+		token.itemsConsumedFromCurrent += static_cast<std::uint32_t>(count);
+		max -= count;
+
+		auto tail = producerListTail.load(std::memory_order_acquire);
+		auto ptr = static_cast<ProducerBase*>(token.currentProducer)->next_prod();
+		if (ptr == nullptr) {
+			ptr = tail;
+		}
+		while (ptr != static_cast<ProducerBase*>(token.currentProducer)) {
+			auto dequeued = ptr->dequeue_bulk(itemFirst, max);
+			count += dequeued;
+			if (dequeued != 0) {
+				token.currentProducer = ptr;
+				token.itemsConsumedFromCurrent = static_cast<std::uint32_t>(dequeued);
+			}
+			if (dequeued == max) {
+				break;
+			}
+			max -= dequeued;
+			ptr = ptr->next_prod();
+			if (ptr == nullptr) {
+				ptr = tail;
+			}
+		}
+		return count;
+	}
+
+
+
+	// Attempts to dequeue from a specific producer's inner queue.
+	// If you happen to know which producer you want to dequeue from, this
+	// is significantly faster than using the general-case try_dequeue methods.
+	// Returns false if the producer's queue appeared empty at the time it
+	// was checked (so, the queue is likely but not guaranteed to be empty).
+	// Never allocates. Thread-safe.
+	template<typename U>
+	inline bool try_dequeue_from_producer(producer_token_t const& producer, U& item)
+	{
+		return static_cast<ExplicitProducer*>(producer.producer)->dequeue(item);
+	}
+
+	// Attempts to dequeue several elements from a specific producer's inner queue.
+	// Returns the number of items actually dequeued.
+	// If you happen to know which producer you want to dequeue from, this
+	// is significantly faster than using the general-case try_dequeue methods.
+	// Returns 0 if the producer's queue appeared empty at the time it
+	// was checked (so, the queue is likely but not guaranteed to be empty).
+	// Never allocates. Thread-safe.
+	template<typename It>
+	inline size_t try_dequeue_bulk_from_producer(producer_token_t const& producer, It itemFirst, size_t max)
+	{
+		return static_cast<ExplicitProducer*>(producer.producer)->dequeue_bulk(itemFirst, max);
+	}
+
+
+	// Returns an estimate of the total number of elements currently in the queue. This
+	// estimate is only accurate if the queue has completely stabilized before it is called
+	// (i.e. all enqueue and dequeue operations have completed and their memory effects are
+	// visible on the calling thread, and no further operations start while this method is
+	// being called).
+	// Thread-safe.
+	size_t size_approx() const
+	{
+		size_t size = 0;
+		for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {
+			size += ptr->size_approx();
+		}
+		return size;
+	}
+
+
+	// Returns true if the underlying atomic variables used by
+	// the queue are lock-free (they should be on most platforms).
+	// Thread-safe.
+	static constexpr bool is_lock_free()
+	{
+		return
+			details::static_is_lock_free<bool>::value == 2 &&
+			details::static_is_lock_free<size_t>::value == 2 &&
+			details::static_is_lock_free<std::uint32_t>::value == 2 &&
+			details::static_is_lock_free<index_t>::value == 2 &&
+			details::static_is_lock_free<void*>::value == 2 &&
+			details::static_is_lock_free<typename details::thread_id_converter<details::thread_id_t>::thread_id_numeric_size_t>::value == 2;
+	}
+
+
+private:
+	friend struct ProducerToken;
+	friend struct ConsumerToken;
+	struct ExplicitProducer;
+	friend struct ExplicitProducer;
+	struct ImplicitProducer;
+	friend struct ImplicitProducer;
+	friend class ConcurrentQueueTests;
+
+	enum AllocationMode { CanAlloc, CannotAlloc };
+
+
+	///////////////////////////////
+	// Queue methods
+	///////////////////////////////
+
+	template<AllocationMode canAlloc, typename U>
+	inline bool inner_enqueue(producer_token_t const& token, U&& element)
+	{
+		return static_cast<ExplicitProducer*>(token.producer)->ConcurrentQueue::ExplicitProducer::template enqueue<canAlloc>(std::forward<U>(element));
+	}
+
+	template<AllocationMode canAlloc, typename U>
+	inline bool inner_enqueue(U&& element)
+	{
+		auto producer = get_or_add_implicit_producer();
+		return producer == nullptr ? false : producer->ConcurrentQueue::ImplicitProducer::template enqueue<canAlloc>(std::forward<U>(element));
+	}
+
+	template<AllocationMode canAlloc, typename It>
+	inline bool inner_enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count)
+	{
+		return static_cast<ExplicitProducer*>(token.producer)->ConcurrentQueue::ExplicitProducer::template enqueue_bulk<canAlloc>(itemFirst, count);
+	}
+
+	template<AllocationMode canAlloc, typename It>
+	inline bool inner_enqueue_bulk(It itemFirst, size_t count)
+	{
+		auto producer = get_or_add_implicit_producer();
+		return producer == nullptr ? false : producer->ConcurrentQueue::ImplicitProducer::template enqueue_bulk<canAlloc>(itemFirst, count);
+	}
+
+	inline bool update_current_producer_after_rotation(consumer_token_t& token)
+	{
+		// Ah, there's been a rotation, figure out where we should be!
+		auto tail = producerListTail.load(std::memory_order_acquire);
+		if (token.desiredProducer == nullptr && tail == nullptr) {
+			return false;
+		}
+		auto prodCount = producerCount.load(std::memory_order_relaxed);
+		auto globalOffset = globalExplicitConsumerOffset.load(std::memory_order_relaxed);
+		if ((details::unlikely)(token.desiredProducer == nullptr)) {
+			// Aha, first time we're dequeueing anything.
+			// Figure out our local position
+			// Note: offset is from start, not end, but we're traversing from end -- subtract from count first
+			std::uint32_t offset = prodCount - 1 - (token.initialOffset % prodCount);
+			token.desiredProducer = tail;
+			for (std::uint32_t i = 0; i != offset; ++i) {
+				token.desiredProducer = static_cast<ProducerBase*>(token.desiredProducer)->next_prod();
+				if (token.desiredProducer == nullptr) {
+					token.desiredProducer = tail;
+				}
+			}
+		}
+
+		std::uint32_t delta = globalOffset - token.lastKnownGlobalOffset;
+		if (delta >= prodCount) {
+			delta = delta % prodCount;
+		}
+		for (std::uint32_t i = 0; i != delta; ++i) {
+			token.desiredProducer = static_cast<ProducerBase*>(token.desiredProducer)->next_prod();
+			if (token.desiredProducer == nullptr) {
+				token.desiredProducer = tail;
+			}
+		}
+
+		token.lastKnownGlobalOffset = globalOffset;
+		token.currentProducer = token.desiredProducer;
+		token.itemsConsumedFromCurrent = 0;
+		return true;
+	}
+
+
+	///////////////////////////
+	// Free list
+	///////////////////////////
+
+	template <typename N>
+	struct FreeListNode
+	{
+		FreeListNode() : freeListRefs(0), freeListNext(nullptr) { }
+
+		std::atomic<std::uint32_t> freeListRefs;
+		std::atomic<N*> freeListNext;
+	};
+
+	// A simple CAS-based lock-free free list. Not the fastest thing in the world under heavy contention, but
+	// simple and correct (assuming nodes are never freed until after the free list is destroyed), and fairly
+	// speedy under low contention.
+	template<typename N>		// N must inherit FreeListNode or have the same fields (and initialization of them)
+	struct FreeList
+	{
+		FreeList() : freeListHead(nullptr) { }
+		FreeList(FreeList&& other) : freeListHead(other.freeListHead.load(std::memory_order_relaxed)) { other.freeListHead.store(nullptr, std::memory_order_relaxed); }
+		void swap(FreeList& other) { details::swap_relaxed(freeListHead, other.freeListHead); }
+
+		FreeList(FreeList const&) MOODYCAMEL_DELETE_FUNCTION;
+		FreeList& operator=(FreeList const&) MOODYCAMEL_DELETE_FUNCTION;
+
+		inline void add(N* node)
+		{
+#ifdef MCDBGQ_NOLOCKFREE_FREELIST
+			debug::DebugLock lock(mutex);
+#endif
+			// We know that the should-be-on-freelist bit is 0 at this point, so it's safe to
+			// set it using a fetch_add
+			if (node->freeListRefs.fetch_add(SHOULD_BE_ON_FREELIST, std::memory_order_acq_rel) == 0) {
+				// Oh look! We were the last ones referencing this node, and we know
+				// we want to add it to the free list, so let's do it!
+		 		add_knowing_refcount_is_zero(node);
+			}
+		}
+
+		inline N* try_get()
+		{
+#ifdef MCDBGQ_NOLOCKFREE_FREELIST
+			debug::DebugLock lock(mutex);
+#endif
+			auto head = freeListHead.load(std::memory_order_acquire);
+			while (head != nullptr) {
+				auto prevHead = head;
+				auto refs = head->freeListRefs.load(std::memory_order_relaxed);
+				if ((refs & REFS_MASK) == 0 || !head->freeListRefs.compare_exchange_strong(refs, refs + 1, std::memory_order_acquire, std::memory_order_relaxed)) {
+					head = freeListHead.load(std::memory_order_acquire);
+					continue;
+				}
+
+				// Good, reference count has been incremented (it wasn't at zero), which means we can read the
+				// next and not worry about it changing between now and the time we do the CAS
+				auto next = head->freeListNext.load(std::memory_order_relaxed);
+				if (freeListHead.compare_exchange_strong(head, next, std::memory_order_acquire, std::memory_order_relaxed)) {
+					// Yay, got the node. This means it was on the list, which means shouldBeOnFreeList must be false no
+					// matter the refcount (because nobody else knows it's been taken off yet, it can't have been put back on).
+					assert((head->freeListRefs.load(std::memory_order_relaxed) & SHOULD_BE_ON_FREELIST) == 0);
+
+					// Decrease refcount twice, once for our ref, and once for the list's ref
+					head->freeListRefs.fetch_sub(2, std::memory_order_release);
+					return head;
+				}
+
+				// OK, the head must have changed on us, but we still need to decrease the refcount we increased.
+				// Note that we don't need to release any memory effects, but we do need to ensure that the reference
+				// count decrement happens-after the CAS on the head.
+				refs = prevHead->freeListRefs.fetch_sub(1, std::memory_order_acq_rel);
+				if (refs == SHOULD_BE_ON_FREELIST + 1) {
+					add_knowing_refcount_is_zero(prevHead);
+				}
+			}
+
+			return nullptr;
+		}
+
+		// Useful for traversing the list when there's no contention (e.g. to destroy remaining nodes)
+		N* head_unsafe() const { return freeListHead.load(std::memory_order_relaxed); }
+
+	private:
+		inline void add_knowing_refcount_is_zero(N* node)
+		{
+			// Since the refcount is zero, and nobody can increase it once it's zero (except us, and we run
+			// only one copy of this method per node at a time, i.e. the single thread case), then we know
+			// we can safely change the next pointer of the node; however, once the refcount is back above
+			// zero, then other threads could increase it (happens under heavy contention, when the refcount
+			// goes to zero in between a load and a refcount increment of a node in try_get, then back up to
+			// something non-zero, then the refcount increment is done by the other thread) -- so, if the CAS
+			// to add the node to the actual list fails, decrease the refcount and leave the add operation to
+			// the next thread who puts the refcount back at zero (which could be us, hence the loop).
+			auto head = freeListHead.load(std::memory_order_relaxed);
+			while (true) {
+				node->freeListNext.store(head, std::memory_order_relaxed);
+				node->freeListRefs.store(1, std::memory_order_release);
+				if (!freeListHead.compare_exchange_strong(head, node, std::memory_order_release, std::memory_order_relaxed)) {
+					// Hmm, the add failed, but we can only try again when the refcount goes back to zero
+					if (node->freeListRefs.fetch_add(SHOULD_BE_ON_FREELIST - 1, std::memory_order_release) == 1) {
+						continue;
+					}
+				}
+				return;
+			}
+		}
+
+	private:
+		// Implemented like a stack, but where node order doesn't matter (nodes are inserted out of order under contention)
+		std::atomic<N*> freeListHead;
+
+	static const std::uint32_t REFS_MASK = 0x7FFFFFFF;
+	static const std::uint32_t SHOULD_BE_ON_FREELIST = 0x80000000;
+
+#ifdef MCDBGQ_NOLOCKFREE_FREELIST
+		debug::DebugMutex mutex;
+#endif
+	};
+
+
+	///////////////////////////
+	// Block
+	///////////////////////////
+
+	enum InnerQueueContext { implicit_context = 0, explicit_context = 1 };
+
+	struct Block
+	{
+		Block()
+			: next(nullptr), elementsCompletelyDequeued(0), freeListRefs(0), freeListNext(nullptr), dynamicallyAllocated(true)
+		{
+#ifdef MCDBGQ_TRACKMEM
+			owner = nullptr;
+#endif
+		}
+
+		template<InnerQueueContext context>
+		inline bool is_empty() const
+		{
+			MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
+				// Check flags
+				for (size_t i = 0; i < BLOCK_SIZE; ++i) {
+					if (!emptyFlags[i].load(std::memory_order_relaxed)) {
+						return false;
+					}
+				}
+
+				// Aha, empty; make sure we have all other memory effects that happened before the empty flags were set
+				std::atomic_thread_fence(std::memory_order_acquire);
+				return true;
+			}
+			else {
+				// Check counter
+				if (elementsCompletelyDequeued.load(std::memory_order_relaxed) == BLOCK_SIZE) {
+					std::atomic_thread_fence(std::memory_order_acquire);
+					return true;
+				}
+				assert(elementsCompletelyDequeued.load(std::memory_order_relaxed) <= BLOCK_SIZE);
+				return false;
+			}
+		}
+
+		// Returns true if the block is now empty (does not apply in explicit context)
+		template<InnerQueueContext context>
+		inline bool set_empty(MOODYCAMEL_MAYBE_UNUSED index_t i)
+		{
+			MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
+				// Set flag
+				assert(!emptyFlags[BLOCK_SIZE - 1 - static_cast<size_t>(i & static_cast<index_t>(BLOCK_SIZE - 1))].load(std::memory_order_relaxed));
+				emptyFlags[BLOCK_SIZE - 1 - static_cast<size_t>(i & static_cast<index_t>(BLOCK_SIZE - 1))].store(true, std::memory_order_release);
+				return false;
+			}
+			else {
+				// Increment counter
+				auto prevVal = elementsCompletelyDequeued.fetch_add(1, std::memory_order_release);
+				assert(prevVal < BLOCK_SIZE);
+				return prevVal == BLOCK_SIZE - 1;
+			}
+		}
+
+		// Sets multiple contiguous item statuses to 'empty' (assumes no wrapping and count > 0).
+		// Returns true if the block is now empty (does not apply in explicit context).
+		template<InnerQueueContext context>
+		inline bool set_many_empty(MOODYCAMEL_MAYBE_UNUSED index_t i, size_t count)
+		{
+			MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
+				// Set flags
+				std::atomic_thread_fence(std::memory_order_release);
+				i = BLOCK_SIZE - 1 - static_cast<size_t>(i & static_cast<index_t>(BLOCK_SIZE - 1)) - count + 1;
+				for (size_t j = 0; j != count; ++j) {
+					assert(!emptyFlags[i + j].load(std::memory_order_relaxed));
+					emptyFlags[i + j].store(true, std::memory_order_relaxed);
+				}
+				return false;
+			}
+			else {
+				// Increment counter
+				auto prevVal = elementsCompletelyDequeued.fetch_add(count, std::memory_order_release);
+				assert(prevVal + count <= BLOCK_SIZE);
+				return prevVal + count == BLOCK_SIZE;
+			}
+		}
+
+		template<InnerQueueContext context>
+		inline void set_all_empty()
+		{
+			MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
+				// Set all flags
+				for (size_t i = 0; i != BLOCK_SIZE; ++i) {
+					emptyFlags[i].store(true, std::memory_order_relaxed);
+				}
+			}
+			else {
+				// Reset counter
+				elementsCompletelyDequeued.store(BLOCK_SIZE, std::memory_order_relaxed);
+			}
+		}
+
+		template<InnerQueueContext context>
+		inline void reset_empty()
+		{
+			MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
+				// Reset flags
+				for (size_t i = 0; i != BLOCK_SIZE; ++i) {
+					emptyFlags[i].store(false, std::memory_order_relaxed);
+				}
+			}
+			else {
+				// Reset counter
+				elementsCompletelyDequeued.store(0, std::memory_order_relaxed);
+			}
+		}
+
+		inline T* operator[](index_t idx) MOODYCAMEL_NOEXCEPT { return static_cast<T*>(static_cast<void*>(elements)) + static_cast<size_t>(idx & static_cast<index_t>(BLOCK_SIZE - 1)); }
+		inline T const* operator[](index_t idx) const MOODYCAMEL_NOEXCEPT { return static_cast<T const*>(static_cast<void const*>(elements)) + static_cast<size_t>(idx & static_cast<index_t>(BLOCK_SIZE - 1)); }
+
+	private:
+		static_assert(std::alignment_of<T>::value <= sizeof(T), "The queue does not support types with an alignment greater than their size at this time");
+		MOODYCAMEL_ALIGNED_TYPE_LIKE(char[sizeof(T) * BLOCK_SIZE], T) elements;
+	public:
+		Block* next;
+		std::atomic<size_t> elementsCompletelyDequeued;
+		std::atomic<bool> emptyFlags[BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD ? BLOCK_SIZE : 1];
+	public:
+		std::atomic<std::uint32_t> freeListRefs;
+		std::atomic<Block*> freeListNext;
+		bool dynamicallyAllocated;		// Perhaps a better name for this would be 'isNotPartOfInitialBlockPool'
+
+#ifdef MCDBGQ_TRACKMEM
+		void* owner;
+#endif
+	};
+	static_assert(std::alignment_of<Block>::value >= std::alignment_of<T>::value, "Internal error: Blocks must be at least as aligned as the type they are wrapping");
+
+
+#ifdef MCDBGQ_TRACKMEM
+public:
+	struct MemStats;
+private:
+#endif
+
+	///////////////////////////
+	// Producer base
+	///////////////////////////
+
+	struct ProducerBase : public details::ConcurrentQueueProducerTypelessBase
+	{
+		ProducerBase(ConcurrentQueue* parent_, bool isExplicit_) :
+			tailIndex(0),
+			headIndex(0),
+			dequeueOptimisticCount(0),
+			dequeueOvercommit(0),
+			tailBlock(nullptr),
+			isExplicit(isExplicit_),
+			parent(parent_)
+		{
+		}
+
+		virtual ~ProducerBase() { }
+
+		template<typename U>
+		inline bool dequeue(U& element)
+		{
+			if (isExplicit) {
+				return static_cast<ExplicitProducer*>(this)->dequeue(element);
+			}
+			else {
+				return static_cast<ImplicitProducer*>(this)->dequeue(element);
+			}
+		}
+
+		template<typename It>
+		inline size_t dequeue_bulk(It& itemFirst, size_t max)
+		{
+			if (isExplicit) {
+				return static_cast<ExplicitProducer*>(this)->dequeue_bulk(itemFirst, max);
+			}
+			else {
+				return static_cast<ImplicitProducer*>(this)->dequeue_bulk(itemFirst, max);
+			}
+		}
+
+		inline ProducerBase* next_prod() const { return static_cast<ProducerBase*>(next); }
+
+		inline size_t size_approx() const
+		{
+			auto tail = tailIndex.load(std::memory_order_relaxed);
+			auto head = headIndex.load(std::memory_order_relaxed);
+			return details::circular_less_than(head, tail) ? static_cast<size_t>(tail - head) : 0;
+		}
+
+		inline index_t getTail() const { return tailIndex.load(std::memory_order_relaxed); }
+	protected:
+		std::atomic<index_t> tailIndex;		// Where to enqueue to next
+		std::atomic<index_t> headIndex;		// Where to dequeue from next
+
+		std::atomic<index_t> dequeueOptimisticCount;
+		std::atomic<index_t> dequeueOvercommit;
+
+		Block* tailBlock;
+
+	public:
+		bool isExplicit;
+		ConcurrentQueue* parent;
+
+	protected:
+#ifdef MCDBGQ_TRACKMEM
+		friend struct MemStats;
+#endif
+	};
+
+
+	///////////////////////////
+	// Explicit queue
+	///////////////////////////
+
+	struct ExplicitProducer : public ProducerBase
+	{
+		explicit ExplicitProducer(ConcurrentQueue* parent_) :
+			ProducerBase(parent_, true),
+			blockIndex(nullptr),
+			pr_blockIndexSlotsUsed(0),
+			pr_blockIndexSize(EXPLICIT_INITIAL_INDEX_SIZE >> 1),
+			pr_blockIndexFront(0),
+			pr_blockIndexEntries(nullptr),
+			pr_blockIndexRaw(nullptr)
+		{
+			size_t poolBasedIndexSize = details::ceil_to_pow_2(parent_->initialBlockPoolSize) >> 1;
+			if (poolBasedIndexSize > pr_blockIndexSize) {
+				pr_blockIndexSize = poolBasedIndexSize;
+			}
+
+			new_block_index(0);		// This creates an index with double the number of current entries, i.e. EXPLICIT_INITIAL_INDEX_SIZE
+		}
+
+		~ExplicitProducer()
+		{
+			// Destruct any elements not yet dequeued.
+			// Since we're in the destructor, we can assume all elements
+			// are either completely dequeued or completely not (no halfways).
+			if (this->tailBlock != nullptr) {		// Note this means there must be a block index too
+				// First find the block that's partially dequeued, if any
+				Block* halfDequeuedBlock = nullptr;
+				if ((this->headIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE - 1)) != 0) {
+					// The head's not on a block boundary, meaning a block somewhere is partially dequeued
+					// (or the head block is the tail block and was fully dequeued, but the head/tail are still not on a boundary)
+					size_t i = (pr_blockIndexFront - pr_blockIndexSlotsUsed) & (pr_blockIndexSize - 1);
+					while (details::circular_less_than<index_t>(pr_blockIndexEntries[i].base + BLOCK_SIZE, this->headIndex.load(std::memory_order_relaxed))) {
+						i = (i + 1) & (pr_blockIndexSize - 1);
+					}
+					assert(details::circular_less_than<index_t>(pr_blockIndexEntries[i].base, this->headIndex.load(std::memory_order_relaxed)));
+					halfDequeuedBlock = pr_blockIndexEntries[i].block;
+				}
+
+				// Start at the head block (note the first line in the loop gives us the head from the tail on the first iteration)
+				auto block = this->tailBlock;
+				do {
+					block = block->next;
+					if (block->ConcurrentQueue::Block::template is_empty<explicit_context>()) {
+						continue;
+					}
+
+					size_t i = 0;	// Offset into block
+					if (block == halfDequeuedBlock) {
+						i = static_cast<size_t>(this->headIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE - 1));
+					}
+
+					// Walk through all the items in the block; if this is the tail block, we need to stop when we reach the tail index
+					auto lastValidIndex = (this->tailIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 ? BLOCK_SIZE : static_cast<size_t>(this->tailIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE - 1));
+					while (i != BLOCK_SIZE && (block != this->tailBlock || i != lastValidIndex)) {
+						(*block)[i++]->~T();
+					}
+				} while (block != this->tailBlock);
+			}
+
+			// Destroy all blocks that we own
+			if (this->tailBlock != nullptr) {
+				auto block = this->tailBlock;
+				do {
+					auto nextBlock = block->next;
+					this->parent->add_block_to_free_list(block);
+					block = nextBlock;
+				} while (block != this->tailBlock);
+			}
+
+			// Destroy the block indices
+			auto header = static_cast<BlockIndexHeader*>(pr_blockIndexRaw);
+			while (header != nullptr) {
+				auto prev = static_cast<BlockIndexHeader*>(header->prev);
+				header->~BlockIndexHeader();
+				(Traits::free)(header);
+				header = prev;
+			}
+		}
+
+		template<AllocationMode allocMode, typename U>
+		inline bool enqueue(U&& element)
+		{
+			index_t currentTailIndex = this->tailIndex.load(std::memory_order_relaxed);
+			index_t newTailIndex = 1 + currentTailIndex;
+			if ((currentTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {
+				// We reached the end of a block, start a new one
+				auto startBlock = this->tailBlock;
+				auto originalBlockIndexSlotsUsed = pr_blockIndexSlotsUsed;
+				if (this->tailBlock != nullptr && this->tailBlock->next->ConcurrentQueue::Block::template is_empty<explicit_context>()) {
+					// We can re-use the block ahead of us, it's empty!
+					this->tailBlock = this->tailBlock->next;
+					this->tailBlock->ConcurrentQueue::Block::template reset_empty<explicit_context>();
+
+					// We'll put the block on the block index (guaranteed to be room since we're conceptually removing the
+					// last block from it first -- except instead of removing then adding, we can just overwrite).
+					// Note that there must be a valid block index here, since even if allocation failed in the ctor,
+					// it would have been re-attempted when adding the first block to the queue; since there is such
+					// a block, a block index must have been successfully allocated.
+				}
+				else {
+					// Whatever head value we see here is >= the last value we saw here (relatively),
+					// and <= its current value. Since we have the most recent tail, the head must be
+					// <= to it.
+					auto head = this->headIndex.load(std::memory_order_relaxed);
+					assert(!details::circular_less_than<index_t>(currentTailIndex, head));
+					if (!details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE)
+						|| (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head))) {
+						// We can't enqueue in another block because there's not enough leeway -- the
+						// tail could surpass the head by the time the block fills up! (Or we'll exceed
+						// the size limit, if the second part of the condition was true.)
+						return false;
+					}
+					// We're going to need a new block; check that the block index has room
+					if (pr_blockIndexRaw == nullptr || pr_blockIndexSlotsUsed == pr_blockIndexSize) {
+						// Hmm, the circular block index is already full -- we'll need
+						// to allocate a new index. Note pr_blockIndexRaw can only be nullptr if
+						// the initial allocation failed in the constructor.
+
+						MOODYCAMEL_CONSTEXPR_IF (allocMode == CannotAlloc) {
+							return false;
+						}
+						else if (!new_block_index(pr_blockIndexSlotsUsed)) {
+							return false;
+						}
+					}
+
+					// Insert a new block in the circular linked list
+					auto newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>();
+					if (newBlock == nullptr) {
+						return false;
+					}
+#ifdef MCDBGQ_TRACKMEM
+					newBlock->owner = this;
+#endif
+					newBlock->ConcurrentQueue::Block::template reset_empty<explicit_context>();
+					if (this->tailBlock == nullptr) {
+						newBlock->next = newBlock;
+					}
+					else {
+						newBlock->next = this->tailBlock->next;
+						this->tailBlock->next = newBlock;
+					}
+					this->tailBlock = newBlock;
+					++pr_blockIndexSlotsUsed;
+				}
+
+				MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast<T*>(nullptr)) T(std::forward<U>(element)))) {
+					// The constructor may throw. We want the element not to appear in the queue in
+					// that case (without corrupting the queue):
+					MOODYCAMEL_TRY {
+						new ((*this->tailBlock)[currentTailIndex]) T(std::forward<U>(element));
+					}
+					MOODYCAMEL_CATCH (...) {
+						// Revert change to the current block, but leave the new block available
+						// for next time
+						pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;
+						this->tailBlock = startBlock == nullptr ? this->tailBlock : startBlock;
+						MOODYCAMEL_RETHROW;
+					}
+				}
+				else {
+					(void)startBlock;
+					(void)originalBlockIndexSlotsUsed;
+				}
+
+				// Add block to block index
+				auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront];
+				entry.base = currentTailIndex;
+				entry.block = this->tailBlock;
+				blockIndex.load(std::memory_order_relaxed)->front.store(pr_blockIndexFront, std::memory_order_release);
+				pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1);
+
+				MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast<T*>(nullptr)) T(std::forward<U>(element)))) {
+					this->tailIndex.store(newTailIndex, std::memory_order_release);
+					return true;
+				}
+			}
+
+			// Enqueue
+			new ((*this->tailBlock)[currentTailIndex]) T(std::forward<U>(element));
+
+			this->tailIndex.store(newTailIndex, std::memory_order_release);
+			return true;
+		}
+
+		template<typename U>
+		bool dequeue(U& element)
+		{
+			auto tail = this->tailIndex.load(std::memory_order_relaxed);
+			auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);
+			if (details::circular_less_than<index_t>(this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit, tail)) {
+				// Might be something to dequeue, let's give it a try
+
+				// Note that this if is purely for performance purposes in the common case when the queue is
+				// empty and the values are eventually consistent -- we may enter here spuriously.
+
+				// Note that whatever the values of overcommit and tail are, they are not going to change (unless we
+				// change them) and must be the same value at this point (inside the if) as when the if condition was
+				// evaluated.
+
+				// We insert an acquire fence here to synchronize-with the release upon incrementing dequeueOvercommit below.
+				// This ensures that whatever the value we got loaded into overcommit, the load of dequeueOptisticCount in
+				// the fetch_add below will result in a value at least as recent as that (and therefore at least as large).
+				// Note that I believe a compiler (signal) fence here would be sufficient due to the nature of fetch_add (all
+				// read-modify-write operations are guaranteed to work on the latest value in the modification order), but
+				// unfortunately that can't be shown to be correct using only the C++11 standard.
+				// See http://stackoverflow.com/questions/18223161/what-are-the-c11-memory-ordering-guarantees-in-this-corner-case
+				std::atomic_thread_fence(std::memory_order_acquire);
+
+				// Increment optimistic counter, then check if it went over the boundary
+				auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(1, std::memory_order_relaxed);
+
+				// Note that since dequeueOvercommit must be <= dequeueOptimisticCount (because dequeueOvercommit is only ever
+				// incremented after dequeueOptimisticCount -- this is enforced in the `else` block below), and since we now
+				// have a version of dequeueOptimisticCount that is at least as recent as overcommit (due to the release upon
+				// incrementing dequeueOvercommit and the acquire above that synchronizes with it), overcommit <= myDequeueCount.
+				// However, we can't assert this since both dequeueOptimisticCount and dequeueOvercommit may (independently)
+				// overflow; in such a case, though, the logic still holds since the difference between the two is maintained.
+
+				// Note that we reload tail here in case it changed; it will be the same value as before or greater, since
+				// this load is sequenced after (happens after) the earlier load above. This is supported by read-read
+				// coherency (as defined in the standard), explained here: http://en.cppreference.com/w/cpp/atomic/memory_order
+				tail = this->tailIndex.load(std::memory_order_acquire);
+				if ((details::likely)(details::circular_less_than<index_t>(myDequeueCount - overcommit, tail))) {
+					// Guaranteed to be at least one element to dequeue!
+
+					// Get the index. Note that since there's guaranteed to be at least one element, this
+					// will never exceed tail. We need to do an acquire-release fence here since it's possible
+					// that whatever condition got us to this point was for an earlier enqueued element (that
+					// we already see the memory effects for), but that by the time we increment somebody else
+					// has incremented it, and we need to see the memory effects for *that* element, which is
+					// in such a case is necessarily visible on the thread that incremented it in the first
+					// place with the more current condition (they must have acquired a tail that is at least
+					// as recent).
+					auto index = this->headIndex.fetch_add(1, std::memory_order_acq_rel);
+
+
+					// Determine which block the element is in
+
+					auto localBlockIndex = blockIndex.load(std::memory_order_acquire);
+					auto localBlockIndexHead = localBlockIndex->front.load(std::memory_order_acquire);
+
+					// We need to be careful here about subtracting and dividing because of index wrap-around.
+					// When an index wraps, we need to preserve the sign of the offset when dividing it by the
+					// block size (in order to get a correct signed block count offset in all cases):
+					auto headBase = localBlockIndex->entries[localBlockIndexHead].base;
+					auto blockBaseIndex = index & ~static_cast<index_t>(BLOCK_SIZE - 1);
+					auto offset = static_cast<size_t>(static_cast<typename std::make_signed<index_t>::type>(blockBaseIndex - headBase) / static_cast<typename std::make_signed<index_t>::type>(BLOCK_SIZE));
+					auto block = localBlockIndex->entries[(localBlockIndexHead + offset) & (localBlockIndex->size - 1)].block;
+
+					// Dequeue
+					auto& el = *((*block)[index]);
+					if (!MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, element = std::move(el))) {
+						// Make sure the element is still fully dequeued and destroyed even if the assignment
+						// throws
+						struct Guard {
+							Block* block;
+							index_t index;
+
+							~Guard()
+							{
+								(*block)[index]->~T();
+								block->ConcurrentQueue::Block::template set_empty<explicit_context>(index);
+							}
+						} guard = { block, index };
+
+						element = std::move(el); // NOLINT
+					}
+					else {
+						element = std::move(el); // NOLINT
+						el.~T(); // NOLINT
+						block->ConcurrentQueue::Block::template set_empty<explicit_context>(index);
+					}
+
+					return true;
+				}
+				else {
+					// Wasn't anything to dequeue after all; make the effective dequeue count eventually consistent
+					this->dequeueOvercommit.fetch_add(1, std::memory_order_release);		// Release so that the fetch_add on dequeueOptimisticCount is guaranteed to happen before this write
+				}
+			}
+
+			return false;
+		}
+
+		template<AllocationMode allocMode, typename It>
+		bool MOODYCAMEL_NO_TSAN enqueue_bulk(It itemFirst, size_t count)
+		{
+			// First, we need to make sure we have enough room to enqueue all of the elements;
+			// this means pre-allocating blocks and putting them in the block index (but only if
+			// all the allocations succeeded).
+			index_t startTailIndex = this->tailIndex.load(std::memory_order_relaxed);
+			auto startBlock = this->tailBlock;
+			auto originalBlockIndexFront = pr_blockIndexFront;
+			auto originalBlockIndexSlotsUsed = pr_blockIndexSlotsUsed;
+
+			Block* firstAllocatedBlock = nullptr;
+
+			// Figure out how many blocks we'll need to allocate, and do so
+			size_t blockBaseDiff = ((startTailIndex + count - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1)) - ((startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1));
+			index_t currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);
+			if (blockBaseDiff > 0) {
+				// Allocate as many blocks as possible from ahead
+				while (blockBaseDiff > 0 && this->tailBlock != nullptr && this->tailBlock->next != firstAllocatedBlock && this->tailBlock->next->ConcurrentQueue::Block::template is_empty<explicit_context>()) {
+					blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE);
+					currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
+
+					this->tailBlock = this->tailBlock->next;
+					firstAllocatedBlock = firstAllocatedBlock == nullptr ? this->tailBlock : firstAllocatedBlock;
+
+					auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront];
+					entry.base = currentTailIndex;
+					entry.block = this->tailBlock;
+					pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1);
+				}
+
+				// Now allocate as many blocks as necessary from the block pool
+				while (blockBaseDiff > 0) {
+					blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE);
+					currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
+
+					auto head = this->headIndex.load(std::memory_order_relaxed);
+					assert(!details::circular_less_than<index_t>(currentTailIndex, head));
+					bool full = !details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head));
+					if (pr_blockIndexRaw == nullptr || pr_blockIndexSlotsUsed == pr_blockIndexSize || full) {
+						MOODYCAMEL_CONSTEXPR_IF (allocMode == CannotAlloc) {
+							// Failed to allocate, undo changes (but keep injected blocks)
+							pr_blockIndexFront = originalBlockIndexFront;
+							pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;
+							this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock;
+							return false;
+						}
+						else if (full || !new_block_index(originalBlockIndexSlotsUsed)) {
+							// Failed to allocate, undo changes (but keep injected blocks)
+							pr_blockIndexFront = originalBlockIndexFront;
+							pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;
+							this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock;
+							return false;
+						}
+
+						// pr_blockIndexFront is updated inside new_block_index, so we need to
+						// update our fallback value too (since we keep the new index even if we
+						// later fail)
+						originalBlockIndexFront = originalBlockIndexSlotsUsed;
+					}
+
+					// Insert a new block in the circular linked list
+					auto newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>();
+					if (newBlock == nullptr) {
+						pr_blockIndexFront = originalBlockIndexFront;
+						pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;
+						this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock;
+						return false;
+					}
+
+#ifdef MCDBGQ_TRACKMEM
+					newBlock->owner = this;
+#endif
+					newBlock->ConcurrentQueue::Block::template set_all_empty<explicit_context>();
+					if (this->tailBlock == nullptr) {
+						newBlock->next = newBlock;
+					}
+					else {
+						newBlock->next = this->tailBlock->next;
+						this->tailBlock->next = newBlock;
+					}
+					this->tailBlock = newBlock;
+					firstAllocatedBlock = firstAllocatedBlock == nullptr ? this->tailBlock : firstAllocatedBlock;
+
+					++pr_blockIndexSlotsUsed;
+
+					auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront];
+					entry.base = currentTailIndex;
+					entry.block = this->tailBlock;
+					pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1);
+				}
+
+				// Excellent, all allocations succeeded. Reset each block's emptiness before we fill them up, and
+				// publish the new block index front
+				auto block = firstAllocatedBlock;
+				while (true) {
+					block->ConcurrentQueue::Block::template reset_empty<explicit_context>();
+					if (block == this->tailBlock) {
+						break;
+					}
+					block = block->next;
+				}
+
+				MOODYCAMEL_CONSTEXPR_IF (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))) {
+					blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), std::memory_order_release);
+				}
+			}
+
+			// Enqueue, one block at a time
+			index_t newTailIndex = startTailIndex + static_cast<index_t>(count);
+			currentTailIndex = startTailIndex;
+			auto endBlock = this->tailBlock;
+			this->tailBlock = startBlock;
+			assert((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0 || firstAllocatedBlock != nullptr || count == 0);
+			if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 && firstAllocatedBlock != nullptr) {
+				this->tailBlock = firstAllocatedBlock;
+			}
+			while (true) {
+				index_t stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
+				if (details::circular_less_than<index_t>(newTailIndex, stopIndex)) {
+					stopIndex = newTailIndex;
+				}
+				MOODYCAMEL_CONSTEXPR_IF (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))) {
+					while (currentTailIndex != stopIndex) {
+						new ((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++);
+					}
+				}
+				else {
+					MOODYCAMEL_TRY {
+						while (currentTailIndex != stopIndex) {
+							// Must use copy constructor even if move constructor is available
+							// because we may have to revert if there's an exception.
+							// Sorry about the horrible templated next line, but it was the only way
+							// to disable moving *at compile time*, which is important because a type
+							// may only define a (noexcept) move constructor, and so calls to the
+							// cctor will not compile, even if they are in an if branch that will never
+							// be executed
+							new ((*this->tailBlock)[currentTailIndex]) T(details::nomove_if<!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst));
+							++currentTailIndex;
+							++itemFirst;
+						}
+					}
+					MOODYCAMEL_CATCH (...) {
+						// Oh dear, an exception's been thrown -- destroy the elements that
+						// were enqueued so far and revert the entire bulk operation (we'll keep
+						// any allocated blocks in our linked list for later, though).
+						auto constructedStopIndex = currentTailIndex;
+						auto lastBlockEnqueued = this->tailBlock;
+
+						pr_blockIndexFront = originalBlockIndexFront;
+						pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;
+						this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock;
+
+						if (!details::is_trivially_destructible<T>::value) {
+							auto block = startBlock;
+							if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {
+								block = firstAllocatedBlock;
+							}
+							currentTailIndex = startTailIndex;
+							while (true) {
+								stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
+								if (details::circular_less_than<index_t>(constructedStopIndex, stopIndex)) {
+									stopIndex = constructedStopIndex;
+								}
+								while (currentTailIndex != stopIndex) {
+									(*block)[currentTailIndex++]->~T();
+								}
+								if (block == lastBlockEnqueued) {
+									break;
+								}
+								block = block->next;
+							}
+						}
+						MOODYCAMEL_RETHROW;
+					}
+				}
+
+				if (this->tailBlock == endBlock) {
+					assert(currentTailIndex == newTailIndex);
+					break;
+				}
+				this->tailBlock = this->tailBlock->next;
+			}
+
+			MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))) {
+				if (firstAllocatedBlock != nullptr)
+					blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), std::memory_order_release);
+			}
+
+			this->tailIndex.store(newTailIndex, std::memory_order_release);
+			return true;
+		}
+
+		template<typename It>
+		size_t dequeue_bulk(It& itemFirst, size_t max)
+		{
+			auto tail = this->tailIndex.load(std::memory_order_relaxed);
+			auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);
+			auto desiredCount = static_cast<size_t>(tail - (this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit));
+			if (details::circular_less_than<size_t>(0, desiredCount)) {
+				desiredCount = desiredCount < max ? desiredCount : max;
+				std::atomic_thread_fence(std::memory_order_acquire);
+
+				auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed);
+
+				tail = this->tailIndex.load(std::memory_order_acquire);
+				auto actualCount = static_cast<size_t>(tail - (myDequeueCount - overcommit));
+				if (details::circular_less_than<size_t>(0, actualCount)) {
+					actualCount = desiredCount < actualCount ? desiredCount : actualCount;
+					if (actualCount < desiredCount) {
+						this->dequeueOvercommit.fetch_add(desiredCount - actualCount, std::memory_order_release);
+					}
+
+					// Get the first index. Note that since there's guaranteed to be at least actualCount elements, this
+					// will never exceed tail.
+					auto firstIndex = this->headIndex.fetch_add(actualCount, std::memory_order_acq_rel);
+
+					// Determine which block the first element is in
+					auto localBlockIndex = blockIndex.load(std::memory_order_acquire);
+					auto localBlockIndexHead = localBlockIndex->front.load(std::memory_order_acquire);
+
+					auto headBase = localBlockIndex->entries[localBlockIndexHead].base;
+					auto firstBlockBaseIndex = firstIndex & ~static_cast<index_t>(BLOCK_SIZE - 1);
+					auto offset = static_cast<size_t>(static_cast<typename std::make_signed<index_t>::type>(firstBlockBaseIndex - headBase) / static_cast<typename std::make_signed<index_t>::type>(BLOCK_SIZE));
+					auto indexIndex = (localBlockIndexHead + offset) & (localBlockIndex->size - 1);
+
+					// Iterate the blocks and dequeue
+					auto index = firstIndex;
+					do {
+						auto firstIndexInBlock = index;
+						index_t endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
+						endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex + static_cast<index_t>(actualCount) : endIndex;
+						auto block = localBlockIndex->entries[indexIndex].block;
+						if (MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, details::deref_noexcept(itemFirst) = std::move((*(*block)[index])))) {
+							while (index != endIndex) {
+								auto& el = *((*block)[index]);
+								*itemFirst++ = std::move(el);
+								el.~T();
+								++index;
+							}
+						}
+						else {
+							MOODYCAMEL_TRY {
+								while (index != endIndex) {
+									auto& el = *((*block)[index]);
+									*itemFirst = std::move(el);
+									++itemFirst;
+									el.~T();
+									++index;
+								}
+							}
+							MOODYCAMEL_CATCH (...) {
+								// It's too late to revert the dequeue, but we can make sure that all
+								// the dequeued objects are properly destroyed and the block index
+								// (and empty count) are properly updated before we propagate the exception
+								do {
+									block = localBlockIndex->entries[indexIndex].block;
+									while (index != endIndex) {
+										(*block)[index++]->~T();
+									}
+									block->ConcurrentQueue::Block::template set_many_empty<explicit_context>(firstIndexInBlock, static_cast<size_t>(endIndex - firstIndexInBlock));
+									indexIndex = (indexIndex + 1) & (localBlockIndex->size - 1);
+
+									firstIndexInBlock = index;
+									endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
+									endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex + static_cast<index_t>(actualCount) : endIndex;
+								} while (index != firstIndex + actualCount);
+
+								MOODYCAMEL_RETHROW;
+							}
+						}
+						block->ConcurrentQueue::Block::template set_many_empty<explicit_context>(firstIndexInBlock, static_cast<size_t>(endIndex - firstIndexInBlock));
+						indexIndex = (indexIndex + 1) & (localBlockIndex->size - 1);
+					} while (index != firstIndex + actualCount);
+
+					return actualCount;
+				}
+				else {
+					// Wasn't anything to dequeue after all; make the effective dequeue count eventually consistent
+					this->dequeueOvercommit.fetch_add(desiredCount, std::memory_order_release);
+				}
+			}
+
+			return 0;
+		}
+
+	private:
+		struct BlockIndexEntry
+		{
+			index_t base;
+			Block* block;
+		};
+
+		struct BlockIndexHeader
+		{
+			size_t size;
+			std::atomic<size_t> front;		// Current slot (not next, like pr_blockIndexFront)
+			BlockIndexEntry* entries;
+			void* prev;
+		};
+
+
+		bool new_block_index(size_t numberOfFilledSlotsToExpose)
+		{
+			auto prevBlockSizeMask = pr_blockIndexSize - 1;
+
+			// Create the new block
+			pr_blockIndexSize <<= 1;
+			auto newRawPtr = static_cast<char*>((Traits::malloc)(sizeof(BlockIndexHeader) + std::alignment_of<BlockIndexEntry>::value - 1 + sizeof(BlockIndexEntry) * pr_blockIndexSize));
+			if (newRawPtr == nullptr) {
+				pr_blockIndexSize >>= 1;		// Reset to allow graceful retry
+				return false;
+			}
+
+			auto newBlockIndexEntries = reinterpret_cast<BlockIndexEntry*>(details::align_for<BlockIndexEntry>(newRawPtr + sizeof(BlockIndexHeader)));
+
+			// Copy in all the old indices, if any
+			size_t j = 0;
+			if (pr_blockIndexSlotsUsed != 0) {
+				auto i = (pr_blockIndexFront - pr_blockIndexSlotsUsed) & prevBlockSizeMask;
+				do {
+					newBlockIndexEntries[j++] = pr_blockIndexEntries[i];
+					i = (i + 1) & prevBlockSizeMask;
+				} while (i != pr_blockIndexFront);
+			}
+
+			// Update everything
+			auto header = new (newRawPtr) BlockIndexHeader;
+			header->size = pr_blockIndexSize;
+			header->front.store(numberOfFilledSlotsToExpose - 1, std::memory_order_relaxed);
+			header->entries = newBlockIndexEntries;
+			header->prev = pr_blockIndexRaw;		// we link the new block to the old one so we can free it later
+
+			pr_blockIndexFront = j;
+			pr_blockIndexEntries = newBlockIndexEntries;
+			pr_blockIndexRaw = newRawPtr;
+			blockIndex.store(header, std::memory_order_release);
+
+			return true;
+		}
+
+	private:
+		std::atomic<BlockIndexHeader*> blockIndex;
+
+		// To be used by producer only -- consumer must use the ones in referenced by blockIndex
+		size_t pr_blockIndexSlotsUsed;
+		size_t pr_blockIndexSize;
+		size_t pr_blockIndexFront;		// Next slot (not current)
+		BlockIndexEntry* pr_blockIndexEntries;
+		void* pr_blockIndexRaw;
+
+#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
+	public:
+		ExplicitProducer* nextExplicitProducer;
+	private:
+#endif
+
+#ifdef MCDBGQ_TRACKMEM
+		friend struct MemStats;
+#endif
+	};
+
+
+	//////////////////////////////////
+	// Implicit queue
+	//////////////////////////////////
+
+	struct ImplicitProducer : public ProducerBase
+	{
+		ImplicitProducer(ConcurrentQueue* parent_) :
+			ProducerBase(parent_, false),
+			nextBlockIndexCapacity(IMPLICIT_INITIAL_INDEX_SIZE),
+			blockIndex(nullptr)
+		{
+			new_block_index();
+		}
+
+		~ImplicitProducer()
+		{
+			// Note that since we're in the destructor we can assume that all enqueue/dequeue operations
+			// completed already; this means that all undequeued elements are placed contiguously across
+			// contiguous blocks, and that only the first and last remaining blocks can be only partially
+			// empty (all other remaining blocks must be completely full).
+
+#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
+			// Unregister ourselves for thread termination notification
+			if (!this->inactive.load(std::memory_order_relaxed)) {
+				details::ThreadExitNotifier::unsubscribe(&threadExitListener);
+			}
+#endif
+
+			// Destroy all remaining elements!
+			auto tail = this->tailIndex.load(std::memory_order_relaxed);
+			auto index = this->headIndex.load(std::memory_order_relaxed);
+			Block* block = nullptr;
+			assert(index == tail || details::circular_less_than(index, tail));
+			bool forceFreeLastBlock = index != tail;		// If we enter the loop, then the last (tail) block will not be freed
+			while (index != tail) {
+				if ((index & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 || block == nullptr) {
+					if (block != nullptr) {
+						// Free the old block
+						this->parent->add_block_to_free_list(block);
+					}
+
+					block = get_block_index_entry_for_index(index)->value.load(std::memory_order_relaxed);
+				}
+
+				((*block)[index])->~T();
+				++index;
+			}
+			// Even if the queue is empty, there's still one block that's not on the free list
+			// (unless the head index reached the end of it, in which case the tail will be poised
+			// to create a new block).
+			if (this->tailBlock != nullptr && (forceFreeLastBlock || (tail & static_cast<index_t>(BLOCK_SIZE - 1)) != 0)) {
+				this->parent->add_block_to_free_list(this->tailBlock);
+			}
+
+			// Destroy block index
+			auto localBlockIndex = blockIndex.load(std::memory_order_relaxed);
+			if (localBlockIndex != nullptr) {
+				for (size_t i = 0; i != localBlockIndex->capacity; ++i) {
+					localBlockIndex->index[i]->~BlockIndexEntry();
+				}
+				do {
+					auto prev = localBlockIndex->prev;
+					localBlockIndex->~BlockIndexHeader();
+					(Traits::free)(localBlockIndex);
+					localBlockIndex = prev;
+				} while (localBlockIndex != nullptr);
+			}
+		}
+
+		template<AllocationMode allocMode, typename U>
+		inline bool enqueue(U&& element)
+		{
+			index_t currentTailIndex = this->tailIndex.load(std::memory_order_relaxed);
+			index_t newTailIndex = 1 + currentTailIndex;
+			if ((currentTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {
+				// We reached the end of a block, start a new one
+				auto head = this->headIndex.load(std::memory_order_relaxed);
+				assert(!details::circular_less_than<index_t>(currentTailIndex, head));
+				if (!details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head))) {
+					return false;
+				}
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+				debug::DebugLock lock(mutex);
+#endif
+				// Find out where we'll be inserting this block in the block index
+				BlockIndexEntry* idxEntry;
+				if (!insert_block_index_entry<allocMode>(idxEntry, currentTailIndex)) {
+					return false;
+				}
+
+				// Get ahold of a new block
+				auto newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>();
+				if (newBlock == nullptr) {
+					rewind_block_index_tail();
+					idxEntry->value.store(nullptr, std::memory_order_relaxed);
+					return false;
+				}
+#ifdef MCDBGQ_TRACKMEM
+				newBlock->owner = this;
+#endif
+				newBlock->ConcurrentQueue::Block::template reset_empty<implicit_context>();
+
+				MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast<T*>(nullptr)) T(std::forward<U>(element)))) {
+					// May throw, try to insert now before we publish the fact that we have this new block
+					MOODYCAMEL_TRY {
+						new ((*newBlock)[currentTailIndex]) T(std::forward<U>(element));
+					}
+					MOODYCAMEL_CATCH (...) {
+						rewind_block_index_tail();
+						idxEntry->value.store(nullptr, std::memory_order_relaxed);
+						this->parent->add_block_to_free_list(newBlock);
+						MOODYCAMEL_RETHROW;
+					}
+				}
+
+				// Insert the new block into the index
+				idxEntry->value.store(newBlock, std::memory_order_relaxed);
+
+				this->tailBlock = newBlock;
+
+				MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast<T*>(nullptr)) T(std::forward<U>(element)))) {
+					this->tailIndex.store(newTailIndex, std::memory_order_release);
+					return true;
+				}
+			}
+
+			// Enqueue
+			new ((*this->tailBlock)[currentTailIndex]) T(std::forward<U>(element));
+
+			this->tailIndex.store(newTailIndex, std::memory_order_release);
+			return true;
+		}
+
+		template<typename U>
+		bool dequeue(U& element)
+		{
+			// See ExplicitProducer::dequeue for rationale and explanation
+			index_t tail = this->tailIndex.load(std::memory_order_relaxed);
+			index_t overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);
+			if (details::circular_less_than<index_t>(this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit, tail)) {
+				std::atomic_thread_fence(std::memory_order_acquire);
+
+				index_t myDequeueCount = this->dequeueOptimisticCount.fetch_add(1, std::memory_order_relaxed);
+				tail = this->tailIndex.load(std::memory_order_acquire);
+				if ((details::likely)(details::circular_less_than<index_t>(myDequeueCount - overcommit, tail))) {
+					index_t index = this->headIndex.fetch_add(1, std::memory_order_acq_rel);
+
+					// Determine which block the element is in
+					auto entry = get_block_index_entry_for_index(index);
+
+					// Dequeue
+					auto block = entry->value.load(std::memory_order_relaxed);
+					auto& el = *((*block)[index]);
+
+					if (!MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, element = std::move(el))) {
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+						// Note: Acquiring the mutex with every dequeue instead of only when a block
+						// is released is very sub-optimal, but it is, after all, purely debug code.
+						debug::DebugLock lock(producer->mutex);
+#endif
+						struct Guard {
+							Block* block;
+							index_t index;
+							BlockIndexEntry* entry;
+							ConcurrentQueue* parent;
+
+							~Guard()
+							{
+								(*block)[index]->~T();
+								if (block->ConcurrentQueue::Block::template set_empty<implicit_context>(index)) {
+									entry->value.store(nullptr, std::memory_order_relaxed);
+									parent->add_block_to_free_list(block);
+								}
+							}
+						} guard = { block, index, entry, this->parent };
+
+						element = std::move(el); // NOLINT
+					}
+					else {
+						element = std::move(el); // NOLINT
+						el.~T(); // NOLINT
+
+						if (block->ConcurrentQueue::Block::template set_empty<implicit_context>(index)) {
+							{
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+								debug::DebugLock lock(mutex);
+#endif
+								// Add the block back into the global free pool (and remove from block index)
+								entry->value.store(nullptr, std::memory_order_relaxed);
+							}
+							this->parent->add_block_to_free_list(block);		// releases the above store
+						}
+					}
+
+					return true;
+				}
+				else {
+					this->dequeueOvercommit.fetch_add(1, std::memory_order_release);
+				}
+			}
+
+			return false;
+		}
+
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable: 4706)  // assignment within conditional expression
+#endif
+		template<AllocationMode allocMode, typename It>
+		bool enqueue_bulk(It itemFirst, size_t count)
+		{
+			// First, we need to make sure we have enough room to enqueue all of the elements;
+			// this means pre-allocating blocks and putting them in the block index (but only if
+			// all the allocations succeeded).
+
+			// Note that the tailBlock we start off with may not be owned by us any more;
+			// this happens if it was filled up exactly to the top (setting tailIndex to
+			// the first index of the next block which is not yet allocated), then dequeued
+			// completely (putting it on the free list) before we enqueue again.
+
+			index_t startTailIndex = this->tailIndex.load(std::memory_order_relaxed);
+			auto startBlock = this->tailBlock;
+			Block* firstAllocatedBlock = nullptr;
+			auto endBlock = this->tailBlock;
+
+			// Figure out how many blocks we'll need to allocate, and do so
+			size_t blockBaseDiff = ((startTailIndex + count - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1)) - ((startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1));
+			index_t currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);
+			if (blockBaseDiff > 0) {
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+				debug::DebugLock lock(mutex);
+#endif
+				do {
+					blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE);
+					currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
+
+					// Find out where we'll be inserting this block in the block index
+					BlockIndexEntry* idxEntry = nullptr;  // initialization here unnecessary but compiler can't always tell
+					Block* newBlock;
+					bool indexInserted = false;
+					auto head = this->headIndex.load(std::memory_order_relaxed);
+					assert(!details::circular_less_than<index_t>(currentTailIndex, head));
+					bool full = !details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head));
+
+					if (full || !(indexInserted = insert_block_index_entry<allocMode>(idxEntry, currentTailIndex)) || (newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>()) == nullptr) {
+						// Index allocation or block allocation failed; revert any other allocations
+						// and index insertions done so far for this operation
+						if (indexInserted) {
+							rewind_block_index_tail();
+							idxEntry->value.store(nullptr, std::memory_order_relaxed);
+						}
+						currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);
+						for (auto block = firstAllocatedBlock; block != nullptr; block = block->next) {
+							currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
+							idxEntry = get_block_index_entry_for_index(currentTailIndex);
+							idxEntry->value.store(nullptr, std::memory_order_relaxed);
+							rewind_block_index_tail();
+						}
+						this->parent->add_blocks_to_free_list(firstAllocatedBlock);
+						this->tailBlock = startBlock;
+
+						return false;
+					}
+
+#ifdef MCDBGQ_TRACKMEM
+					newBlock->owner = this;
+#endif
+					newBlock->ConcurrentQueue::Block::template reset_empty<implicit_context>();
+					newBlock->next = nullptr;
+
+					// Insert the new block into the index
+					idxEntry->value.store(newBlock, std::memory_order_relaxed);
+
+					// Store the chain of blocks so that we can undo if later allocations fail,
+					// and so that we can find the blocks when we do the actual enqueueing
+					if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0 || firstAllocatedBlock != nullptr) {
+						assert(this->tailBlock != nullptr);
+						this->tailBlock->next = newBlock;
+					}
+					this->tailBlock = newBlock;
+					endBlock = newBlock;
+					firstAllocatedBlock = firstAllocatedBlock == nullptr ? newBlock : firstAllocatedBlock;
+				} while (blockBaseDiff > 0);
+			}
+
+			// Enqueue, one block at a time
+			index_t newTailIndex = startTailIndex + static_cast<index_t>(count);
+			currentTailIndex = startTailIndex;
+			this->tailBlock = startBlock;
+			assert((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0 || firstAllocatedBlock != nullptr || count == 0);
+			if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 && firstAllocatedBlock != nullptr) {
+				this->tailBlock = firstAllocatedBlock;
+			}
+			while (true) {
+				index_t stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
+				if (details::circular_less_than<index_t>(newTailIndex, stopIndex)) {
+					stopIndex = newTailIndex;
+				}
+				MOODYCAMEL_CONSTEXPR_IF (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))) {
+					while (currentTailIndex != stopIndex) {
+						new ((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++);
+					}
+				}
+				else {
+					MOODYCAMEL_TRY {
+						while (currentTailIndex != stopIndex) {
+							new ((*this->tailBlock)[currentTailIndex]) T(details::nomove_if<!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst));
+							++currentTailIndex;
+							++itemFirst;
+						}
+					}
+					MOODYCAMEL_CATCH (...) {
+						auto constructedStopIndex = currentTailIndex;
+						auto lastBlockEnqueued = this->tailBlock;
+
+						if (!details::is_trivially_destructible<T>::value) {
+							auto block = startBlock;
+							if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {
+								block = firstAllocatedBlock;
+							}
+							currentTailIndex = startTailIndex;
+							while (true) {
+								stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
+								if (details::circular_less_than<index_t>(constructedStopIndex, stopIndex)) {
+									stopIndex = constructedStopIndex;
+								}
+								while (currentTailIndex != stopIndex) {
+									(*block)[currentTailIndex++]->~T();
+								}
+								if (block == lastBlockEnqueued) {
+									break;
+								}
+								block = block->next;
+							}
+						}
+
+						currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);
+						for (auto block = firstAllocatedBlock; block != nullptr; block = block->next) {
+							currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
+							auto idxEntry = get_block_index_entry_for_index(currentTailIndex);
+							idxEntry->value.store(nullptr, std::memory_order_relaxed);
+							rewind_block_index_tail();
+						}
+						this->parent->add_blocks_to_free_list(firstAllocatedBlock);
+						this->tailBlock = startBlock;
+						MOODYCAMEL_RETHROW;
+					}
+				}
+
+				if (this->tailBlock == endBlock) {
+					assert(currentTailIndex == newTailIndex);
+					break;
+				}
+				this->tailBlock = this->tailBlock->next;
+			}
+			this->tailIndex.store(newTailIndex, std::memory_order_release);
+			return true;
+		}
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+		template<typename It>
+		size_t dequeue_bulk(It& itemFirst, size_t max)
+		{
+			auto tail = this->tailIndex.load(std::memory_order_relaxed);
+			auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);
+			auto desiredCount = static_cast<size_t>(tail - (this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit));
+			if (details::circular_less_than<size_t>(0, desiredCount)) {
+				desiredCount = desiredCount < max ? desiredCount : max;
+				std::atomic_thread_fence(std::memory_order_acquire);
+
+				auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed);
+
+				tail = this->tailIndex.load(std::memory_order_acquire);
+				auto actualCount = static_cast<size_t>(tail - (myDequeueCount - overcommit));
+				if (details::circular_less_than<size_t>(0, actualCount)) {
+					actualCount = desiredCount < actualCount ? desiredCount : actualCount;
+					if (actualCount < desiredCount) {
+						this->dequeueOvercommit.fetch_add(desiredCount - actualCount, std::memory_order_release);
+					}
+
+					// Get the first index. Note that since there's guaranteed to be at least actualCount elements, this
+					// will never exceed tail.
+					auto firstIndex = this->headIndex.fetch_add(actualCount, std::memory_order_acq_rel);
+
+					// Iterate the blocks and dequeue
+					auto index = firstIndex;
+					BlockIndexHeader* localBlockIndex;
+					auto indexIndex = get_block_index_index_for_index(index, localBlockIndex);
+					do {
+						auto blockStartIndex = index;
+						index_t endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
+						endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex + static_cast<index_t>(actualCount) : endIndex;
+
+						auto entry = localBlockIndex->index[indexIndex];
+						auto block = entry->value.load(std::memory_order_relaxed);
+						if (MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, details::deref_noexcept(itemFirst) = std::move((*(*block)[index])))) {
+							while (index != endIndex) {
+								auto& el = *((*block)[index]);
+								*itemFirst++ = std::move(el);
+								el.~T();
+								++index;
+							}
+						}
+						else {
+							MOODYCAMEL_TRY {
+								while (index != endIndex) {
+									auto& el = *((*block)[index]);
+									*itemFirst = std::move(el);
+									++itemFirst;
+									el.~T();
+									++index;
+								}
+							}
+							MOODYCAMEL_CATCH (...) {
+								do {
+									entry = localBlockIndex->index[indexIndex];
+									block = entry->value.load(std::memory_order_relaxed);
+									while (index != endIndex) {
+										(*block)[index++]->~T();
+									}
+
+									if (block->ConcurrentQueue::Block::template set_many_empty<implicit_context>(blockStartIndex, static_cast<size_t>(endIndex - blockStartIndex))) {
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+										debug::DebugLock lock(mutex);
+#endif
+										entry->value.store(nullptr, std::memory_order_relaxed);
+										this->parent->add_block_to_free_list(block);
+									}
+									indexIndex = (indexIndex + 1) & (localBlockIndex->capacity - 1);
+
+									blockStartIndex = index;
+									endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
+									endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex + static_cast<index_t>(actualCount) : endIndex;
+								} while (index != firstIndex + actualCount);
+
+								MOODYCAMEL_RETHROW;
+							}
+						}
+						if (block->ConcurrentQueue::Block::template set_many_empty<implicit_context>(blockStartIndex, static_cast<size_t>(endIndex - blockStartIndex))) {
+							{
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+								debug::DebugLock lock(mutex);
+#endif
+								// Note that the set_many_empty above did a release, meaning that anybody who acquires the block
+								// we're about to free can use it safely since our writes (and reads!) will have happened-before then.
+								entry->value.store(nullptr, std::memory_order_relaxed);
+							}
+							this->parent->add_block_to_free_list(block);		// releases the above store
+						}
+						indexIndex = (indexIndex + 1) & (localBlockIndex->capacity - 1);
+					} while (index != firstIndex + actualCount);
+
+					return actualCount;
+				}
+				else {
+					this->dequeueOvercommit.fetch_add(desiredCount, std::memory_order_release);
+				}
+			}
+
+			return 0;
+		}
+
+	private:
+		// The block size must be > 1, so any number with the low bit set is an invalid block base index
+		static const index_t INVALID_BLOCK_BASE = 1;
+
+		struct BlockIndexEntry
+		{
+			std::atomic<index_t> key;
+			std::atomic<Block*> value;
+		};
+
+		struct BlockIndexHeader
+		{
+			size_t capacity;
+			std::atomic<size_t> tail;
+			BlockIndexEntry* entries;
+			BlockIndexEntry** index;
+			BlockIndexHeader* prev;
+		};
+
+		template<AllocationMode allocMode>
+		inline bool insert_block_index_entry(BlockIndexEntry*& idxEntry, index_t blockStartIndex)
+		{
+			auto localBlockIndex = blockIndex.load(std::memory_order_relaxed);		// We're the only writer thread, relaxed is OK
+			if (localBlockIndex == nullptr) {
+				return false;  // this can happen if new_block_index failed in the constructor
+			}
+			size_t newTail = (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) & (localBlockIndex->capacity - 1);
+			idxEntry = localBlockIndex->index[newTail];
+			if (idxEntry->key.load(std::memory_order_relaxed) == INVALID_BLOCK_BASE ||
+				idxEntry->value.load(std::memory_order_relaxed) == nullptr) {
+
+				idxEntry->key.store(blockStartIndex, std::memory_order_relaxed);
+				localBlockIndex->tail.store(newTail, std::memory_order_release);
+				return true;
+			}
+
+			// No room in the old block index, try to allocate another one!
+			MOODYCAMEL_CONSTEXPR_IF (allocMode == CannotAlloc) {
+				return false;
+			}
+			else if (!new_block_index()) {
+				return false;
+			}
+			else {
+				localBlockIndex = blockIndex.load(std::memory_order_relaxed);
+				newTail = (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) & (localBlockIndex->capacity - 1);
+				idxEntry = localBlockIndex->index[newTail];
+				assert(idxEntry->key.load(std::memory_order_relaxed) == INVALID_BLOCK_BASE);
+				idxEntry->key.store(blockStartIndex, std::memory_order_relaxed);
+				localBlockIndex->tail.store(newTail, std::memory_order_release);
+				return true;
+			}
+		}
+
+		inline void rewind_block_index_tail()
+		{
+			auto localBlockIndex = blockIndex.load(std::memory_order_relaxed);
+			localBlockIndex->tail.store((localBlockIndex->tail.load(std::memory_order_relaxed) - 1) & (localBlockIndex->capacity - 1), std::memory_order_relaxed);
+		}
+
+		inline BlockIndexEntry* get_block_index_entry_for_index(index_t index) const
+		{
+			BlockIndexHeader* localBlockIndex;
+			auto idx = get_block_index_index_for_index(index, localBlockIndex);
+			return localBlockIndex->index[idx];
+		}
+
+		inline size_t get_block_index_index_for_index(index_t index, BlockIndexHeader*& localBlockIndex) const
+		{
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+			debug::DebugLock lock(mutex);
+#endif
+			index &= ~static_cast<index_t>(BLOCK_SIZE - 1);
+			localBlockIndex = blockIndex.load(std::memory_order_acquire);
+			auto tail = localBlockIndex->tail.load(std::memory_order_acquire);
+			auto tailBase = localBlockIndex->index[tail]->key.load(std::memory_order_relaxed);
+			assert(tailBase != INVALID_BLOCK_BASE);
+			// Note: Must use division instead of shift because the index may wrap around, causing a negative
+			// offset, whose negativity we want to preserve
+			auto offset = static_cast<size_t>(static_cast<typename std::make_signed<index_t>::type>(index - tailBase) / static_cast<typename std::make_signed<index_t>::type>(BLOCK_SIZE));
+			size_t idx = (tail + offset) & (localBlockIndex->capacity - 1);
+			assert(localBlockIndex->index[idx]->key.load(std::memory_order_relaxed) == index && localBlockIndex->index[idx]->value.load(std::memory_order_relaxed) != nullptr);
+			return idx;
+		}
+
+		bool new_block_index()
+		{
+			auto prev = blockIndex.load(std::memory_order_relaxed);
+			size_t prevCapacity = prev == nullptr ? 0 : prev->capacity;
+			auto entryCount = prev == nullptr ? nextBlockIndexCapacity : prevCapacity;
+			auto raw = static_cast<char*>((Traits::malloc)(
+				sizeof(BlockIndexHeader) +
+				std::alignment_of<BlockIndexEntry>::value - 1 + sizeof(BlockIndexEntry) * entryCount +
+				std::alignment_of<BlockIndexEntry*>::value - 1 + sizeof(BlockIndexEntry*) * nextBlockIndexCapacity));
+			if (raw == nullptr) {
+				return false;
+			}
+
+			auto header = new (raw) BlockIndexHeader;
+			auto entries = reinterpret_cast<BlockIndexEntry*>(details::align_for<BlockIndexEntry>(raw + sizeof(BlockIndexHeader)));
+			auto index = reinterpret_cast<BlockIndexEntry**>(details::align_for<BlockIndexEntry*>(reinterpret_cast<char*>(entries) + sizeof(BlockIndexEntry) * entryCount));
+			if (prev != nullptr) {
+				auto prevTail = prev->tail.load(std::memory_order_relaxed);
+				auto prevPos = prevTail;
+				size_t i = 0;
+				do {
+					prevPos = (prevPos + 1) & (prev->capacity - 1);
+					index[i++] = prev->index[prevPos];
+				} while (prevPos != prevTail);
+				assert(i == prevCapacity);
+			}
+			for (size_t i = 0; i != entryCount; ++i) {
+				new (entries + i) BlockIndexEntry;
+				entries[i].key.store(INVALID_BLOCK_BASE, std::memory_order_relaxed);
+				index[prevCapacity + i] = entries + i;
+			}
+			header->prev = prev;
+			header->entries = entries;
+			header->index = index;
+			header->capacity = nextBlockIndexCapacity;
+			header->tail.store((prevCapacity - 1) & (nextBlockIndexCapacity - 1), std::memory_order_relaxed);
+
+			blockIndex.store(header, std::memory_order_release);
+
+			nextBlockIndexCapacity <<= 1;
+
+			return true;
+		}
+
+	private:
+		size_t nextBlockIndexCapacity;
+		std::atomic<BlockIndexHeader*> blockIndex;
+
+#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
+	public:
+		details::ThreadExitListener threadExitListener;
+	private:
+#endif
+
+#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
+	public:
+		ImplicitProducer* nextImplicitProducer;
+	private:
+#endif
+
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+		mutable debug::DebugMutex mutex;
+#endif
+#ifdef MCDBGQ_TRACKMEM
+		friend struct MemStats;
+#endif
+	};
+
+
+	//////////////////////////////////
+	// Block pool manipulation
+	//////////////////////////////////
+
+	void populate_initial_block_list(size_t blockCount)
+	{
+		initialBlockPoolSize = blockCount;
+		if (initialBlockPoolSize == 0) {
+			initialBlockPool = nullptr;
+			return;
+		}
+
+		initialBlockPool = create_array<Block>(blockCount);
+		if (initialBlockPool == nullptr) {
+			initialBlockPoolSize = 0;
+		}
+		for (size_t i = 0; i < initialBlockPoolSize; ++i) {
+			initialBlockPool[i].dynamicallyAllocated = false;
+		}
+	}
+
+	inline Block* try_get_block_from_initial_pool()
+	{
+		if (initialBlockPoolIndex.load(std::memory_order_relaxed) >= initialBlockPoolSize) {
+			return nullptr;
+		}
+
+		auto index = initialBlockPoolIndex.fetch_add(1, std::memory_order_relaxed);
+
+		return index < initialBlockPoolSize ? (initialBlockPool + index) : nullptr;
+	}
+
+	inline void add_block_to_free_list(Block* block)
+	{
+#ifdef MCDBGQ_TRACKMEM
+		block->owner = nullptr;
+#endif
+		if (!Traits::RECYCLE_ALLOCATED_BLOCKS && block->dynamicallyAllocated) {
+			destroy(block);
+		}
+		else {
+			freeList.add(block);
+		}
+	}
+
+	inline void add_blocks_to_free_list(Block* block)
+	{
+		while (block != nullptr) {
+			auto next = block->next;
+			add_block_to_free_list(block);
+			block = next;
+		}
+	}
+
+	inline Block* try_get_block_from_free_list()
+	{
+		return freeList.try_get();
+	}
+
+	// Gets a free block from one of the memory pools, or allocates a new one (if applicable)
+	template<AllocationMode canAlloc>
+	Block* requisition_block()
+	{
+		auto block = try_get_block_from_initial_pool();
+		if (block != nullptr) {
+			return block;
+		}
+
+		block = try_get_block_from_free_list();
+		if (block != nullptr) {
+			return block;
+		}
+
+		MOODYCAMEL_CONSTEXPR_IF (canAlloc == CanAlloc) {
+			return create<Block>();
+		}
+		else {
+			return nullptr;
+		}
+	}
+
+
+#ifdef MCDBGQ_TRACKMEM
+	public:
+		struct MemStats {
+			size_t allocatedBlocks;
+			size_t usedBlocks;
+			size_t freeBlocks;
+			size_t ownedBlocksExplicit;
+			size_t ownedBlocksImplicit;
+			size_t implicitProducers;
+			size_t explicitProducers;
+			size_t elementsEnqueued;
+			size_t blockClassBytes;
+			size_t queueClassBytes;
+			size_t implicitBlockIndexBytes;
+			size_t explicitBlockIndexBytes;
+
+			friend class ConcurrentQueue;
+
+		private:
+			static MemStats getFor(ConcurrentQueue* q)
+			{
+				MemStats stats = { 0 };
+
+				stats.elementsEnqueued = q->size_approx();
+
+				auto block = q->freeList.head_unsafe();
+				while (block != nullptr) {
+					++stats.allocatedBlocks;
+					++stats.freeBlocks;
+					block = block->freeListNext.load(std::memory_order_relaxed);
+				}
+
+				for (auto ptr = q->producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {
+					bool implicit = dynamic_cast<ImplicitProducer*>(ptr) != nullptr;
+					stats.implicitProducers += implicit ? 1 : 0;
+					stats.explicitProducers += implicit ? 0 : 1;
+
+					if (implicit) {
+						auto prod = static_cast<ImplicitProducer*>(ptr);
+						stats.queueClassBytes += sizeof(ImplicitProducer);
+						auto head = prod->headIndex.load(std::memory_order_relaxed);
+						auto tail = prod->tailIndex.load(std::memory_order_relaxed);
+						auto hash = prod->blockIndex.load(std::memory_order_relaxed);
+						if (hash != nullptr) {
+							for (size_t i = 0; i != hash->capacity; ++i) {
+								if (hash->index[i]->key.load(std::memory_order_relaxed) != ImplicitProducer::INVALID_BLOCK_BASE && hash->index[i]->value.load(std::memory_order_relaxed) != nullptr) {
+									++stats.allocatedBlocks;
+									++stats.ownedBlocksImplicit;
+								}
+							}
+							stats.implicitBlockIndexBytes += hash->capacity * sizeof(typename ImplicitProducer::BlockIndexEntry);
+							for (; hash != nullptr; hash = hash->prev) {
+								stats.implicitBlockIndexBytes += sizeof(typename ImplicitProducer::BlockIndexHeader) + hash->capacity * sizeof(typename ImplicitProducer::BlockIndexEntry*);
+							}
+						}
+						for (; details::circular_less_than<index_t>(head, tail); head += BLOCK_SIZE) {
+							//auto block = prod->get_block_index_entry_for_index(head);
+							++stats.usedBlocks;
+						}
+					}
+					else {
+						auto prod = static_cast<ExplicitProducer*>(ptr);
+						stats.queueClassBytes += sizeof(ExplicitProducer);
+						auto tailBlock = prod->tailBlock;
+						bool wasNonEmpty = false;
+						if (tailBlock != nullptr) {
+							auto block = tailBlock;
+							do {
+								++stats.allocatedBlocks;
+								if (!block->ConcurrentQueue::Block::template is_empty<explicit_context>() || wasNonEmpty) {
+									++stats.usedBlocks;
+									wasNonEmpty = wasNonEmpty || block != tailBlock;
+								}
+								++stats.ownedBlocksExplicit;
+								block = block->next;
+							} while (block != tailBlock);
+						}
+						auto index = prod->blockIndex.load(std::memory_order_relaxed);
+						while (index != nullptr) {
+							stats.explicitBlockIndexBytes += sizeof(typename ExplicitProducer::BlockIndexHeader) + index->size * sizeof(typename ExplicitProducer::BlockIndexEntry);
+							index = static_cast<typename ExplicitProducer::BlockIndexHeader*>(index->prev);
+						}
+					}
+				}
+
+				auto freeOnInitialPool = q->initialBlockPoolIndex.load(std::memory_order_relaxed) >= q->initialBlockPoolSize ? 0 : q->initialBlockPoolSize - q->initialBlockPoolIndex.load(std::memory_order_relaxed);
+				stats.allocatedBlocks += freeOnInitialPool;
+				stats.freeBlocks += freeOnInitialPool;
+
+				stats.blockClassBytes = sizeof(Block) * stats.allocatedBlocks;
+				stats.queueClassBytes += sizeof(ConcurrentQueue);
+
+				return stats;
+			}
+		};
+
+		// For debugging only. Not thread-safe.
+		MemStats getMemStats()
+		{
+			return MemStats::getFor(this);
+		}
+	private:
+		friend struct MemStats;
+#endif
+
+
+	//////////////////////////////////
+	// Producer list manipulation
+	//////////////////////////////////
+
+	ProducerBase* recycle_or_create_producer(bool isExplicit)
+	{
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
+		debug::DebugLock lock(implicitProdMutex);
+#endif
+		// Try to re-use one first
+		for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {
+			if (ptr->inactive.load(std::memory_order_relaxed) && ptr->isExplicit == isExplicit) {
+				bool expected = true;
+				if (ptr->inactive.compare_exchange_strong(expected, /* desired */ false, std::memory_order_acquire, std::memory_order_relaxed)) {
+					// We caught one! It's been marked as activated, the caller can have it
+					return ptr;
+				}
+			}
+		}
+
+		return add_producer(isExplicit ? static_cast<ProducerBase*>(create<ExplicitProducer>(this)) : create<ImplicitProducer>(this));
+	}
+
+	ProducerBase* add_producer(ProducerBase* producer)
+	{
+		// Handle failed memory allocation
+		if (producer == nullptr) {
+			return nullptr;
+		}
+
+		producerCount.fetch_add(1, std::memory_order_relaxed);
+
+		// Add it to the lock-free list
+		auto prevTail = producerListTail.load(std::memory_order_relaxed);
+		do {
+			producer->next = prevTail;
+		} while (!producerListTail.compare_exchange_weak(prevTail, producer, std::memory_order_release, std::memory_order_relaxed));
+
+#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
+		if (producer->isExplicit) {
+			auto prevTailExplicit = explicitProducers.load(std::memory_order_relaxed);
+			do {
+				static_cast<ExplicitProducer*>(producer)->nextExplicitProducer = prevTailExplicit;
+			} while (!explicitProducers.compare_exchange_weak(prevTailExplicit, static_cast<ExplicitProducer*>(producer), std::memory_order_release, std::memory_order_relaxed));
+		}
+		else {
+			auto prevTailImplicit = implicitProducers.load(std::memory_order_relaxed);
+			do {
+				static_cast<ImplicitProducer*>(producer)->nextImplicitProducer = prevTailImplicit;
+			} while (!implicitProducers.compare_exchange_weak(prevTailImplicit, static_cast<ImplicitProducer*>(producer), std::memory_order_release, std::memory_order_relaxed));
+		}
+#endif
+
+		return producer;
+	}
+
+	void reown_producers()
+	{
+		// After another instance is moved-into/swapped-with this one, all the
+		// producers we stole still think their parents are the other queue.
+		// So fix them up!
+		for (auto ptr = producerListTail.load(std::memory_order_relaxed); ptr != nullptr; ptr = ptr->next_prod()) {
+			ptr->parent = this;
+		}
+	}
+
+
+	//////////////////////////////////
+	// Implicit producer hash
+	//////////////////////////////////
+
+	struct ImplicitProducerKVP
+	{
+		std::atomic<details::thread_id_t> key;
+		ImplicitProducer* value;		// No need for atomicity since it's only read by the thread that sets it in the first place
+
+		ImplicitProducerKVP() : value(nullptr) { }
+
+		ImplicitProducerKVP(ImplicitProducerKVP&& other) MOODYCAMEL_NOEXCEPT
+		{
+			key.store(other.key.load(std::memory_order_relaxed), std::memory_order_relaxed);
+			value = other.value;
+		}
+
+		inline ImplicitProducerKVP& operator=(ImplicitProducerKVP&& other) MOODYCAMEL_NOEXCEPT
+		{
+			swap(other);
+			return *this;
+		}
+
+		inline void swap(ImplicitProducerKVP& other) MOODYCAMEL_NOEXCEPT
+		{
+			if (this != &other) {
+				details::swap_relaxed(key, other.key);
+				std::swap(value, other.value);
+			}
+		}
+	};
+
+	template<typename XT, typename XTraits>
+	friend void moodycamel::swap(typename ConcurrentQueue<XT, XTraits>::ImplicitProducerKVP&, typename ConcurrentQueue<XT, XTraits>::ImplicitProducerKVP&) MOODYCAMEL_NOEXCEPT;
+
+	struct ImplicitProducerHash
+	{
+		size_t capacity;
+		ImplicitProducerKVP* entries;
+		ImplicitProducerHash* prev;
+	};
+
+	inline void populate_initial_implicit_producer_hash()
+	{
+		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) {
+			return;
+		}
+		else {
+			implicitProducerHashCount.store(0, std::memory_order_relaxed);
+			auto hash = &initialImplicitProducerHash;
+			hash->capacity = INITIAL_IMPLICIT_PRODUCER_HASH_SIZE;
+			hash->entries = &initialImplicitProducerHashEntries[0];
+			for (size_t i = 0; i != INITIAL_IMPLICIT_PRODUCER_HASH_SIZE; ++i) {
+				initialImplicitProducerHashEntries[i].key.store(details::invalid_thread_id, std::memory_order_relaxed);
+			}
+			hash->prev = nullptr;
+			implicitProducerHash.store(hash, std::memory_order_relaxed);
+		}
+	}
+
+	void swap_implicit_producer_hashes(ConcurrentQueue& other)
+	{
+		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) {
+			return;
+		}
+		else {
+			// Swap (assumes our implicit producer hash is initialized)
+			initialImplicitProducerHashEntries.swap(other.initialImplicitProducerHashEntries);
+			initialImplicitProducerHash.entries = &initialImplicitProducerHashEntries[0];
+			other.initialImplicitProducerHash.entries = &other.initialImplicitProducerHashEntries[0];
+
+			details::swap_relaxed(implicitProducerHashCount, other.implicitProducerHashCount);
+
+			details::swap_relaxed(implicitProducerHash, other.implicitProducerHash);
+			if (implicitProducerHash.load(std::memory_order_relaxed) == &other.initialImplicitProducerHash) {
+				implicitProducerHash.store(&initialImplicitProducerHash, std::memory_order_relaxed);
+			}
+			else {
+				ImplicitProducerHash* hash;
+				for (hash = implicitProducerHash.load(std::memory_order_relaxed); hash->prev != &other.initialImplicitProducerHash; hash = hash->prev) {
+					continue;
+				}
+				hash->prev = &initialImplicitProducerHash;
+			}
+			if (other.implicitProducerHash.load(std::memory_order_relaxed) == &initialImplicitProducerHash) {
+				other.implicitProducerHash.store(&other.initialImplicitProducerHash, std::memory_order_relaxed);
+			}
+			else {
+				ImplicitProducerHash* hash;
+				for (hash = other.implicitProducerHash.load(std::memory_order_relaxed); hash->prev != &initialImplicitProducerHash; hash = hash->prev) {
+					continue;
+				}
+				hash->prev = &other.initialImplicitProducerHash;
+			}
+		}
+	}
+
+	// Only fails (returns nullptr) if memory allocation fails
+	ImplicitProducer* get_or_add_implicit_producer()
+	{
+		// Note that since the data is essentially thread-local (key is thread ID),
+		// there's a reduced need for fences (memory ordering is already consistent
+		// for any individual thread), except for the current table itself.
+
+		// Start by looking for the thread ID in the current and all previous hash tables.
+		// If it's not found, it must not be in there yet, since this same thread would
+		// have added it previously to one of the tables that we traversed.
+
+		// Code and algorithm adapted from http://preshing.com/20130605/the-worlds-simplest-lock-free-hash-table
+
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
+		debug::DebugLock lock(implicitProdMutex);
+#endif
+
+		auto id = details::thread_id();
+		auto hashedId = details::hash_thread_id(id);
+
+		auto mainHash = implicitProducerHash.load(std::memory_order_acquire);
+		assert(mainHash != nullptr);  // silence clang-tidy and MSVC warnings (hash cannot be null)
+		for (auto hash = mainHash; hash != nullptr; hash = hash->prev) {
+			// Look for the id in this hash
+			auto index = hashedId;
+			while (true) {		// Not an infinite loop because at least one slot is free in the hash table
+				index &= hash->capacity - 1u;
+
+				auto probedKey = hash->entries[index].key.load(std::memory_order_relaxed);
+				if (probedKey == id) {
+					// Found it! If we had to search several hashes deep, though, we should lazily add it
+					// to the current main hash table to avoid the extended search next time.
+					// Note there's guaranteed to be room in the current hash table since every subsequent
+					// table implicitly reserves space for all previous tables (there's only one
+					// implicitProducerHashCount).
+					auto value = hash->entries[index].value;
+					if (hash != mainHash) {
+						index = hashedId;
+						while (true) {
+							index &= mainHash->capacity - 1u;
+							auto empty = details::invalid_thread_id;
+#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
+							auto reusable = details::invalid_thread_id2;
+							if (mainHash->entries[index].key.compare_exchange_strong(empty,    id, std::memory_order_seq_cst, std::memory_order_relaxed) ||
+								mainHash->entries[index].key.compare_exchange_strong(reusable, id, std::memory_order_seq_cst, std::memory_order_relaxed)) {
+#else
+							if (mainHash->entries[index].key.compare_exchange_strong(empty,    id, std::memory_order_seq_cst, std::memory_order_relaxed)) {
+#endif
+								mainHash->entries[index].value = value;
+								break;
+							}
+							++index;
+						}
+					}
+
+					return value;
+				}
+				if (probedKey == details::invalid_thread_id) {
+					break;		// Not in this hash table
+				}
+				++index;
+			}
+		}
+
+		// Insert!
+		auto newCount = 1 + implicitProducerHashCount.fetch_add(1, std::memory_order_relaxed);
+		while (true) {
+			// NOLINTNEXTLINE(clang-analyzer-core.NullDereference)
+			if (newCount >= (mainHash->capacity >> 1) && !implicitProducerHashResizeInProgress.test_and_set(std::memory_order_acquire)) {
+				// We've acquired the resize lock, try to allocate a bigger hash table.
+				// Note the acquire fence synchronizes with the release fence at the end of this block, and hence when
+				// we reload implicitProducerHash it must be the most recent version (it only gets changed within this
+				// locked block).
+				mainHash = implicitProducerHash.load(std::memory_order_acquire);
+				if (newCount >= (mainHash->capacity >> 1)) {
+					size_t newCapacity = mainHash->capacity << 1;
+					while (newCount >= (newCapacity >> 1)) {
+						newCapacity <<= 1;
+					}
+					auto raw = static_cast<char*>((Traits::malloc)(sizeof(ImplicitProducerHash) + std::alignment_of<ImplicitProducerKVP>::value - 1 + sizeof(ImplicitProducerKVP) * newCapacity));
+					if (raw == nullptr) {
+						// Allocation failed
+						implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed);
+						implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);
+						return nullptr;
+					}
+
+					auto newHash = new (raw) ImplicitProducerHash;
+					newHash->capacity = static_cast<size_t>(newCapacity);
+					newHash->entries = reinterpret_cast<ImplicitProducerKVP*>(details::align_for<ImplicitProducerKVP>(raw + sizeof(ImplicitProducerHash)));
+					for (size_t i = 0; i != newCapacity; ++i) {
+						new (newHash->entries + i) ImplicitProducerKVP;
+						newHash->entries[i].key.store(details::invalid_thread_id, std::memory_order_relaxed);
+					}
+					newHash->prev = mainHash;
+					implicitProducerHash.store(newHash, std::memory_order_release);
+					implicitProducerHashResizeInProgress.clear(std::memory_order_release);
+					mainHash = newHash;
+				}
+				else {
+					implicitProducerHashResizeInProgress.clear(std::memory_order_release);
+				}
+			}
+
+			// If it's < three-quarters full, add to the old one anyway so that we don't have to wait for the next table
+			// to finish being allocated by another thread (and if we just finished allocating above, the condition will
+			// always be true)
+			if (newCount < (mainHash->capacity >> 1) + (mainHash->capacity >> 2)) {
+				auto producer = static_cast<ImplicitProducer*>(recycle_or_create_producer(false));
+				if (producer == nullptr) {
+					implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed);
+					return nullptr;
+				}
+
+#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
+				producer->threadExitListener.callback = &ConcurrentQueue::implicit_producer_thread_exited_callback;
+				producer->threadExitListener.userData = producer;
+				details::ThreadExitNotifier::subscribe(&producer->threadExitListener);
+#endif
+
+				auto index = hashedId;
+				while (true) {
+					index &= mainHash->capacity - 1u;
+					auto empty = details::invalid_thread_id;
+#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
+					auto reusable = details::invalid_thread_id2;
+					if (mainHash->entries[index].key.compare_exchange_strong(reusable, id, std::memory_order_seq_cst, std::memory_order_relaxed)) {
+						implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed);  // already counted as a used slot
+						mainHash->entries[index].value = producer;
+						break;
+					}
+#endif
+					if (mainHash->entries[index].key.compare_exchange_strong(empty,    id, std::memory_order_seq_cst, std::memory_order_relaxed)) {
+						mainHash->entries[index].value = producer;
+						break;
+					}
+					++index;
+				}
+				return producer;
+			}
+
+			// Hmm, the old hash is quite full and somebody else is busy allocating a new one.
+			// We need to wait for the allocating thread to finish (if it succeeds, we add, if not,
+			// we try to allocate ourselves).
+			mainHash = implicitProducerHash.load(std::memory_order_acquire);
+		}
+	}
+
+#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
+	void implicit_producer_thread_exited(ImplicitProducer* producer)
+	{
+		// Remove from hash
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
+		debug::DebugLock lock(implicitProdMutex);
+#endif
+		auto hash = implicitProducerHash.load(std::memory_order_acquire);
+		assert(hash != nullptr);		// The thread exit listener is only registered if we were added to a hash in the first place
+		auto id = details::thread_id();
+		auto hashedId = details::hash_thread_id(id);
+		details::thread_id_t probedKey;
+
+		// We need to traverse all the hashes just in case other threads aren't on the current one yet and are
+		// trying to add an entry thinking there's a free slot (because they reused a producer)
+		for (; hash != nullptr; hash = hash->prev) {
+			auto index = hashedId;
+			do {
+				index &= hash->capacity - 1u;
+				probedKey = id;
+				if (hash->entries[index].key.compare_exchange_strong(probedKey, details::invalid_thread_id2, std::memory_order_seq_cst, std::memory_order_relaxed)) {
+					break;
+				}
+				++index;
+			} while (probedKey != details::invalid_thread_id);		// Can happen if the hash has changed but we weren't put back in it yet, or if we weren't added to this hash in the first place
+		}
+
+		// Mark the queue as being recyclable
+		producer->inactive.store(true, std::memory_order_release);
+	}
+
+	static void implicit_producer_thread_exited_callback(void* userData)
+	{
+		auto producer = static_cast<ImplicitProducer*>(userData);
+		auto queue = producer->parent;
+		queue->implicit_producer_thread_exited(producer);
+	}
+#endif
+
+	//////////////////////////////////
+	// Utility functions
+	//////////////////////////////////
+
+	template<typename TAlign>
+	static inline void* aligned_malloc(size_t size)
+	{
+		MOODYCAMEL_CONSTEXPR_IF (std::alignment_of<TAlign>::value <= std::alignment_of<details::max_align_t>::value)
+			return (Traits::malloc)(size);
+		else {
+			size_t alignment = std::alignment_of<TAlign>::value;
+			void* raw = (Traits::malloc)(size + alignment - 1 + sizeof(void*));
+			if (!raw)
+				return nullptr;
+			char* ptr = details::align_for<TAlign>(reinterpret_cast<char*>(raw) + sizeof(void*));
+			*(reinterpret_cast<void**>(ptr) - 1) = raw;
+			return ptr;
+		}
+	}
+
+	template<typename TAlign>
+	static inline void aligned_free(void* ptr)
+	{
+		MOODYCAMEL_CONSTEXPR_IF (std::alignment_of<TAlign>::value <= std::alignment_of<details::max_align_t>::value)
+			return (Traits::free)(ptr);
+		else
+			(Traits::free)(ptr ? *(reinterpret_cast<void**>(ptr) - 1) : nullptr);
+	}
+
+	template<typename U>
+	static inline U* create_array(size_t count)
+	{
+		assert(count > 0);
+		U* p = static_cast<U*>(aligned_malloc<U>(sizeof(U) * count));
+		if (p == nullptr)
+			return nullptr;
+
+		for (size_t i = 0; i != count; ++i)
+			new (p + i) U();
+		return p;
+	}
+
+	template<typename U>
+	static inline void destroy_array(U* p, size_t count)
+	{
+		if (p != nullptr) {
+			assert(count > 0);
+			for (size_t i = count; i != 0; )
+				(p + --i)->~U();
+		}
+		aligned_free<U>(p);
+	}
+
+	template<typename U>
+	static inline U* create()
+	{
+		void* p = aligned_malloc<U>(sizeof(U));
+		return p != nullptr ? new (p) U : nullptr;
+	}
+
+	template<typename U, typename A1>
+	static inline U* create(A1&& a1)
+	{
+		void* p = aligned_malloc<U>(sizeof(U));
+		return p != nullptr ? new (p) U(std::forward<A1>(a1)) : nullptr;
+	}
+
+	template<typename U>
+	static inline void destroy(U* p)
+	{
+		if (p != nullptr)
+			p->~U();
+		aligned_free<U>(p);
+	}
+
+private:
+	std::atomic<ProducerBase*> producerListTail;
+	std::atomic<std::uint32_t> producerCount;
+
+	std::atomic<size_t> initialBlockPoolIndex;
+	Block* initialBlockPool;
+	size_t initialBlockPoolSize;
+
+#ifndef MCDBGQ_USEDEBUGFREELIST
+	FreeList<Block> freeList;
+#else
+	debug::DebugFreeList<Block> freeList;
+#endif
+
+	std::atomic<ImplicitProducerHash*> implicitProducerHash;
+	std::atomic<size_t> implicitProducerHashCount;		// Number of slots logically used
+	ImplicitProducerHash initialImplicitProducerHash;
+	std::array<ImplicitProducerKVP, INITIAL_IMPLICIT_PRODUCER_HASH_SIZE> initialImplicitProducerHashEntries;
+	std::atomic_flag implicitProducerHashResizeInProgress;
+
+	std::atomic<std::uint32_t> nextExplicitConsumerId;
+	std::atomic<std::uint32_t> globalExplicitConsumerOffset;
+
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
+	debug::DebugMutex implicitProdMutex;
+#endif
+
+#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
+	std::atomic<ExplicitProducer*> explicitProducers;
+	std::atomic<ImplicitProducer*> implicitProducers;
+#endif
+};
+
+
+template<typename T, typename Traits>
+ProducerToken::ProducerToken(ConcurrentQueue<T, Traits>& queue)
+	: producer(queue.recycle_or_create_producer(true))
+{
+	if (producer != nullptr) {
+		producer->token = this;
+	}
+}
+
+template<typename T, typename Traits>
+ProducerToken::ProducerToken(BlockingConcurrentQueue<T, Traits>& queue)
+	: producer(reinterpret_cast<ConcurrentQueue<T, Traits>*>(&queue)->recycle_or_create_producer(true))
+{
+	if (producer != nullptr) {
+		producer->token = this;
+	}
+}
+
+template<typename T, typename Traits>
+ConsumerToken::ConsumerToken(ConcurrentQueue<T, Traits>& queue)
+	: itemsConsumedFromCurrent(0), currentProducer(nullptr), desiredProducer(nullptr)
+{
+	initialOffset = queue.nextExplicitConsumerId.fetch_add(1, std::memory_order_release);
+	lastKnownGlobalOffset = static_cast<std::uint32_t>(-1);
+}
+
+template<typename T, typename Traits>
+ConsumerToken::ConsumerToken(BlockingConcurrentQueue<T, Traits>& queue)
+	: itemsConsumedFromCurrent(0), currentProducer(nullptr), desiredProducer(nullptr)
+{
+	initialOffset = reinterpret_cast<ConcurrentQueue<T, Traits>*>(&queue)->nextExplicitConsumerId.fetch_add(1, std::memory_order_release);
+	lastKnownGlobalOffset = static_cast<std::uint32_t>(-1);
+}
+
+template<typename T, typename Traits>
+inline void swap(ConcurrentQueue<T, Traits>& a, ConcurrentQueue<T, Traits>& b) MOODYCAMEL_NOEXCEPT
+{
+	a.swap(b);
+}
+
+inline void swap(ProducerToken& a, ProducerToken& b) MOODYCAMEL_NOEXCEPT
+{
+	a.swap(b);
+}
+
+inline void swap(ConsumerToken& a, ConsumerToken& b) MOODYCAMEL_NOEXCEPT
+{
+	a.swap(b);
+}
+
+template<typename T, typename Traits>
+inline void swap(typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP& a, typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP& b) MOODYCAMEL_NOEXCEPT
+{
+	a.swap(b);
+}
+
+}
+
+#if defined(_MSC_VER) && (!defined(_HAS_CXX17) || !_HAS_CXX17)
+#pragma warning(pop)
+#endif
+
+#if defined(__GNUC__) && !defined(__INTEL_COMPILER)
+#pragma GCC diagnostic pop
+#endif
--- a/third_party/dlpack/dlpack.h
+++ b/third_party/dlpack/dlpack.h
+// copy from:
+// https://github.com/dmlc/dlpack/blob/v0.7/include/dlpack/dlpack.h
+/*!
+ *  Copyright (c) 2017 by Contributors
+ * \file dlpack.h
+ * \brief The common header of DLPack.
+ */
+#ifndef DLPACK_DLPACK_H_
+#define DLPACK_DLPACK_H_
+
+/**
+ * \brief Compatibility with C++
+ */
+#ifdef __cplusplus
+#define DLPACK_EXTERN_C extern "C"
+#else
+#define DLPACK_EXTERN_C
+#endif
+
+/*! \brief The current version of dlpack */
+#define DLPACK_VERSION 70
+
+/*! \brief The current ABI version of dlpack */
+#define DLPACK_ABI_VERSION 1
+
+/*! \brief DLPACK_DLL prefix for windows */
+#ifdef _WIN32
+#ifdef DLPACK_EXPORTS
+#define DLPACK_DLL __declspec(dllexport)
+#else
+#define DLPACK_DLL __declspec(dllimport)
+#endif
+#else
+#define DLPACK_DLL
+#endif
+
+#include <stddef.h>
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+/*!
+ * \brief The device type in DLDevice.
+ */
+#ifdef __cplusplus
+typedef enum : int32_t {
+#else
+typedef enum {
+#endif
+  /*! \brief CPU device */
+  kDLCPU = 1,
+  /*! \brief CUDA GPU device */
+  kDLCUDA = 2,
+  /*!
+   * \brief Pinned CUDA CPU memory by cudaMallocHost
+   */
+  kDLCUDAHost = 3,
+  /*! \brief OpenCL devices. */
+  kDLOpenCL = 4,
+  /*! \brief Vulkan buffer for next generation graphics. */
+  kDLVulkan = 7,
+  /*! \brief Metal for Apple GPU. */
+  kDLMetal = 8,
+  /*! \brief Verilog simulator buffer */
+  kDLVPI = 9,
+  /*! \brief ROCm GPUs for AMD GPUs */
+  kDLROCM = 10,
+  /*!
+   * \brief Pinned ROCm CPU memory allocated by hipMallocHost
+   */
+  kDLROCMHost = 11,
+  /*!
+   * \brief Reserved extension device type,
+   * used for quickly test extension device
+   * The semantics can differ depending on the implementation.
+   */
+  kDLExtDev = 12,
+  /*!
+   * \brief CUDA managed/unified memory allocated by cudaMallocManaged
+   */
+  kDLCUDAManaged = 13,
+  /*!
+   * \brief Unified shared memory allocated on a oneAPI non-partititioned
+   * device. Call to oneAPI runtime is required to determine the device
+   * type, the USM allocation type and the sycl context it is bound to.
+   *
+   */
+  kDLOneAPI = 14,
+  /*! \brief GPU support for next generation WebGPU standard. */
+  kDLWebGPU = 15,
+  /*! \brief Qualcomm Hexagon DSP */
+  kDLHexagon = 16,
+} DLDeviceType;
+
+/*!
+ * \brief A Device for Tensor and operator.
+ */
+typedef struct {
+  /*! \brief The device type used in the device. */
+  DLDeviceType device_type;
+  /*!
+   * \brief The device index.
+   * For vanilla CPU memory, pinned memory, or managed memory, this is set to 0.
+   */
+  int32_t device_id;
+} DLDevice;
+
+/*!
+ * \brief The type code options DLDataType.
+ */
+typedef enum {
+  /*! \brief signed integer */
+  kDLInt = 0U,
+  /*! \brief unsigned integer */
+  kDLUInt = 1U,
+  /*! \brief IEEE floating point */
+  kDLFloat = 2U,
+  /*!
+   * \brief Opaque handle type, reserved for testing purposes.
+   * Frameworks need to agree on the handle data type for the exchange to be well-defined.
+   */
+  kDLOpaqueHandle = 3U,
+  /*! \brief bfloat16 */
+  kDLBfloat = 4U,
+  /*!
+   * \brief complex number
+   * (C/C++/Python layout: compact struct per complex number)
+   */
+  kDLComplex = 5U,
+} DLDataTypeCode;
+
+/*!
+ * \brief The data type the tensor can hold. The data type is assumed to follow the
+ * native endian-ness. An explicit error message should be raised when attempting to
+ * export an array with non-native endianness
+ *
+ *  Examples
+ *   - float: type_code = 2, bits = 32, lanes=1
+ *   - float4(vectorized 4 float): type_code = 2, bits = 32, lanes=4
+ *   - int8: type_code = 0, bits = 8, lanes=1
+ *   - std::complex<float>: type_code = 5, bits = 64, lanes = 1
+ */
+typedef struct {
+  /*!
+   * \brief Type code of base types.
+   * We keep it uint8_t instead of DLDataTypeCode for minimal memory
+   * footprint, but the value should be one of DLDataTypeCode enum values.
+   * */
+  uint8_t code;
+  /*!
+   * \brief Number of bits, common choices are 8, 16, 32.
+   */
+  uint8_t bits;
+  /*! \brief Number of lanes in the type, used for vector types. */
+  uint16_t lanes;
+} DLDataType;
+
+/*!
+ * \brief Plain C Tensor object, does not manage memory.
+ */
+typedef struct {
+  /*!
+   * \brief The data pointer points to the allocated data. This will be CUDA
+   * device pointer or cl_mem handle in OpenCL. It may be opaque on some device
+   * types. This pointer is always aligned to 256 bytes as in CUDA. The
+   * `byte_offset` field should be used to point to the beginning of the data.
+   *
+   * Note that as of Nov 2021, multiply libraries (CuPy, PyTorch, TensorFlow,
+   * TVM, perhaps others) do not adhere to this 256 byte aligment requirement
+   * on CPU/CUDA/ROCm, and always use `byte_offset=0`.  This must be fixed
+   * (after which this note will be updated); at the moment it is recommended
+   * to not rely on the data pointer being correctly aligned.
+   *
+   * For given DLTensor, the size of memory required to store the contents of
+   * data is calculated as follows:
+   *
+   * \code{.c}
+   * static inline size_t GetDataSize(const DLTensor* t) {
+   *   size_t size = 1;
+   *   for (tvm_index_t i = 0; i < t->ndim; ++i) {
+   *     size *= t->shape[i];
+   *   }
+   *   size *= (t->dtype.bits * t->dtype.lanes + 7) / 8;
+   *   return size;
+   * }
+   * \endcode
+   */
+  void* data;
+  /*! \brief The device of the tensor */
+  DLDevice device;
+  /*! \brief Number of dimensions */
+  int32_t ndim;
+  /*! \brief The data type of the pointer*/
+  DLDataType dtype;
+  /*! \brief The shape of the tensor */
+  int64_t* shape;
+  /*!
+   * \brief strides of the tensor (in number of elements, not bytes)
+   *  can be NULL, indicating tensor is compact and row-majored.
+   */
+  int64_t* strides;
+  /*! \brief The offset in bytes to the beginning pointer to data */
+  uint64_t byte_offset;
+} DLTensor;
+
+/*!
+ * \brief C Tensor object, manage memory of DLTensor. This data structure is
+ *  intended to facilitate the borrowing of DLTensor by another framework. It is
+ *  not meant to transfer the tensor. When the borrowing framework doesn't need
+ *  the tensor, it should call the deleter to notify the host that the resource
+ *  is no longer needed.
+ */
+typedef struct DLManagedTensor {
+  /*! \brief DLTensor which is being memory managed */
+  DLTensor dl_tensor;
+  /*! \brief the context of the original host framework of DLManagedTensor in
+   *   which DLManagedTensor is used in the framework. It can also be NULL.
+   */
+  void* manager_ctx;
+  /*! \brief Destructor signature void (*)(void*) - this should be called
+   *   to destruct manager_ctx which holds the DLManagedTensor. It can be NULL
+   *   if there is no way for the caller to provide a reasonable destructor.
+   *   The destructors deletes the argument self as well.
+   */
+  void (*deleter)(struct DLManagedTensor* self);
+} DLManagedTensor;
+#ifdef __cplusplus
+}  // DLPACK_EXTERN_C
+#endif
+#endif  // DLPACK_DLPACK_H_
--- a/third_party/json/json.hpp
+++ b/third_party/json/json.hpp
--- a/third_party/outcome/outcome-experimental.hpp
+++ b/third_party/outcome/outcome-experimental.hpp