UString.h   [plain text]


/*
 *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
 *  Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
 *  Copyright (c) 2009, Google Inc. All rights reserved.
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Library General Public
 *  License as published by the Free Software Foundation; either
 *  version 2 of the License, or (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Library General Public License for more details.
 *
 *  You should have received a copy of the GNU Library General Public License
 *  along with this library; see the file COPYING.LIB.  If not, write to
 *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
 *  Boston, MA 02110-1301, USA.
 *
 */

#ifndef UString_h
#define UString_h

#include "Collector.h"
#include <stdint.h>
#include <string.h>
#include <wtf/Assertions.h>
#include <wtf/PassRefPtr.h>
#include <wtf/PtrAndFlags.h>
#include <wtf/RefPtr.h>
#include <wtf/Vector.h>
#include <wtf/unicode/Unicode.h>

namespace JSC {

    using WTF::PlacementNewAdoptType;
    using WTF::PlacementNewAdopt;

    class IdentifierTable;
  
    class CString {
    public:
        CString()
            : m_length(0)
            , m_data(0)
        {
        }

        CString(const char*);
        CString(const char*, size_t);
        CString(const CString&);

        ~CString();

        static CString adopt(char*, size_t); // buffer should be allocated with new[].

        CString& append(const CString&);
        CString& operator=(const char* c);
        CString& operator=(const CString&);
        CString& operator+=(const CString& c) { return append(c); }

        size_t size() const { return m_length; }
        const char* c_str() const { return m_data; }

    private:
        size_t m_length;
        char* m_data;
    };

    typedef Vector<char, 32> CStringBuffer;

    class UString {
        friend class JIT;

    public:
        struct BaseString;
        struct Rep : Noncopyable {
            friend class JIT;

            static PassRefPtr<Rep> create(UChar* buffer, int length)
            {
                return adoptRef(new BaseString(buffer, length));
            }

            static PassRefPtr<Rep> createEmptyBuffer(size_t size)
            {
                // Guard against integer overflow
                if (size < (std::numeric_limits<size_t>::max() / sizeof(UChar))) {
                    if (void * buf = tryFastMalloc(size * sizeof(UChar)))
                        return adoptRef(new BaseString(static_cast<UChar*>(buf), 0, size));
                }
                return adoptRef(new BaseString(0, 0, 0));
            }

            static PassRefPtr<Rep> createCopying(const UChar*, int);
            static PassRefPtr<Rep> create(PassRefPtr<Rep> base, int offset, int length);

            // Constructs a string from a UTF-8 string, using strict conversion (see comments in UTF8.h).
            // Returns UString::Rep::null for null input or conversion failure.
            static PassRefPtr<Rep> createFromUTF8(const char*);

            void destroy();

            bool baseIsSelf() const { return m_identifierTableAndFlags.isFlagSet(BaseStringFlag); }
            UChar* data() const;
            int size() const { return len; }

            unsigned hash() const { if (_hash == 0) _hash = computeHash(data(), len); return _hash; }
            unsigned computedHash() const { ASSERT(_hash); return _hash; } // fast path for Identifiers

            static unsigned computeHash(const UChar*, int length);
            static unsigned computeHash(const char*, int length);
            static unsigned computeHash(const char* s) { return computeHash(s, strlen(s)); }

            IdentifierTable* identifierTable() const { return m_identifierTableAndFlags.get(); }
            void setIdentifierTable(IdentifierTable* table) { ASSERT(!isStatic()); m_identifierTableAndFlags.set(table); }

            bool isStatic() const { return m_identifierTableAndFlags.isFlagSet(StaticFlag); }
            void setStatic(bool);
            void setBaseString(PassRefPtr<BaseString>);
            BaseString* baseString();
            const BaseString* baseString() const;

            Rep* ref() { ++rc; return this; }
            ALWAYS_INLINE void deref() { if (--rc == 0) destroy(); }

            void checkConsistency() const;
            enum UStringFlags {
                StaticFlag,
                BaseStringFlag
            };

            // unshared data
            int offset;
            int len;
            int rc; // For null and empty static strings, this field does not reflect a correct count, because ref/deref are not thread-safe. A special case in destroy() guarantees that these do not get deleted.
            mutable unsigned _hash;
            PtrAndFlags<IdentifierTable, UStringFlags> m_identifierTableAndFlags;

            static BaseString& null() { return *nullBaseString; }
            static BaseString& empty() { return *emptyBaseString; }

            bool reserveCapacity(int capacity);

        protected:
            // constructor for use by BaseString subclass; they are their own bases
            Rep(int length)
                : offset(0)
                , len(length)
                , rc(1)
                , _hash(0)
                , m_baseString(static_cast<BaseString*>(this))
            {
            }

            Rep(PassRefPtr<BaseString> base, int offsetInBase, int length)
                : offset(offsetInBase)
                , len(length)
                , rc(1)
                , _hash(0)
                , m_baseString(base.releaseRef())
            {
                checkConsistency();
            }


            BaseString* m_baseString;

        private:
            // For SmallStringStorage which allocates an array and does initialization manually.
            Rep() { }

            friend class SmallStringsStorage;
            friend void initializeUString();
            JS_EXPORTDATA static BaseString* nullBaseString;
            JS_EXPORTDATA static BaseString* emptyBaseString;
        };


        struct BaseString : public Rep {
            bool isShared() { return rc != 1; }

            // potentially shared data.
            UChar* buf;
            int preCapacity;
            int usedPreCapacity;
            int capacity;
            int usedCapacity;

            size_t reportedCost;

        private:
            BaseString(UChar* buffer, int length, int additionalCapacity = 0)
                : Rep(length)
                , buf(buffer)
                , preCapacity(0)
                , usedPreCapacity(0)
                , capacity(length + additionalCapacity)
                , usedCapacity(length)
                , reportedCost(0)
            {
                m_identifierTableAndFlags.setFlag(BaseStringFlag);
                checkConsistency();
            }

            friend struct Rep;
            friend class SmallStringsStorage;
            friend void initializeUString();
        };

    public:
        UString();
        UString(const char*);
        UString(const UChar*, int length);
        UString(UChar*, int length, bool copy);

        UString(const UString& s)
            : m_rep(s.m_rep)
        {
        }

        UString(const Vector<UChar>& buffer);

        ~UString()
        {
        }

        // Special constructor for cases where we overwrite an object in place.
        UString(PlacementNewAdoptType)
            : m_rep(PlacementNewAdopt)
        {
        }

        static UString from(int);
        static UString from(unsigned int);
        static UString from(long);
        static UString from(double);

        struct Range {
        public:
            Range(int pos, int len)
                : position(pos)
                , length(len)
            {
            }

            Range()
            {
            }

            int position;
            int length;
        };

        UString spliceSubstringsWithSeparators(const Range* substringRanges, int rangeCount, const UString* separators, int separatorCount) const;

        UString replaceRange(int rangeStart, int RangeEnd, const UString& replacement) const;

        UString& append(const UString&);
        UString& append(const char*);
        UString& append(UChar);
        UString& append(char c) { return append(static_cast<UChar>(static_cast<unsigned char>(c))); }
        UString& append(const UChar*, int size);
        UString& appendNumeric(int);
        UString& appendNumeric(double);

        bool getCString(CStringBuffer&) const;

        // NOTE: This method should only be used for *debugging* purposes as it
        // is neither Unicode safe nor free from side effects nor thread-safe.
        char* ascii() const;

        /**
         * Convert the string to UTF-8, assuming it is UTF-16 encoded.
         * In non-strict mode, this function is tolerant of badly formed UTF-16, it
         * can create UTF-8 strings that are invalid because they have characters in
         * the range U+D800-U+DDFF, U+FFFE, or U+FFFF, but the UTF-8 string is
         * guaranteed to be otherwise valid.
         * In strict mode, error is returned as null CString.
         */
        CString UTF8String(bool strict = false) const;

        UString& operator=(const char*c);

        UString& operator+=(const UString& s) { return append(s); }
        UString& operator+=(const char* s) { return append(s); }

        const UChar* data() const { return m_rep->data(); }

        bool isNull() const { return (m_rep == &Rep::null()); }
        bool isEmpty() const { return (!m_rep->len); }

        bool is8Bit() const;

        int size() const { return m_rep->size(); }

        UChar operator[](int pos) const;

        double toDouble(bool tolerateTrailingJunk, bool tolerateEmptyString) const;
        double toDouble(bool tolerateTrailingJunk) const;
        double toDouble() const;

        uint32_t toUInt32(bool* ok = 0) const;
        uint32_t toUInt32(bool* ok, bool tolerateEmptyString) const;
        uint32_t toStrictUInt32(bool* ok = 0) const;

        unsigned toArrayIndex(bool* ok = 0) const;

        int find(const UString& f, int pos = 0) const;
        int find(UChar, int pos = 0) const;
        int rfind(const UString& f, int pos) const;
        int rfind(UChar, int pos) const;

        UString substr(int pos = 0, int len = -1) const;

        static const UString& null() { return *nullUString; }

        Rep* rep() const { return m_rep.get(); }
        static Rep* nullRep();

        UString(PassRefPtr<Rep> r)
            : m_rep(r)
        {
            ASSERT(m_rep);
        }

        size_t cost() const;

        // Attempt to grow this string such that it can grow to a total length of 'capacity'
        // without reallocation.  This may fail a number of reasons - if the BasicString is
        // shared and another string is using part of the capacity beyond our end point, if
        // the realloc fails, or if this string is empty and has no storage.
        //
        // This method returns a boolean indicating success.
        bool reserveCapacity(int capacity)
        {
            return m_rep->reserveCapacity(capacity);
        }

    private:
        void expandCapacity(int requiredLength);
        void expandPreCapacity(int requiredPreCap);
        void makeNull();

        RefPtr<Rep> m_rep;
        static UString* nullUString;

        friend void initializeUString();
        friend bool operator==(const UString&, const UString&);
        friend PassRefPtr<Rep> concatenate(Rep*, Rep*); // returns 0 if out of memory
    };
    PassRefPtr<UString::Rep> concatenate(UString::Rep*, UString::Rep*);
    PassRefPtr<UString::Rep> concatenate(UString::Rep*, int);
    PassRefPtr<UString::Rep> concatenate(UString::Rep*, double);

    bool operator==(const UString&, const UString&);

    inline bool operator!=(const UString& s1, const UString& s2)
    {
        return !JSC::operator==(s1, s2);
    }

    bool operator<(const UString& s1, const UString& s2);
    bool operator>(const UString& s1, const UString& s2);

    bool operator==(const UString& s1, const char* s2);

    inline bool operator!=(const UString& s1, const char* s2)
    {
        return !JSC::operator==(s1, s2);
    }

    inline bool operator==(const char *s1, const UString& s2)
    {
        return operator==(s2, s1);
    }

    inline bool operator!=(const char *s1, const UString& s2)
    {
        return !JSC::operator==(s1, s2);
    }

    bool operator==(const CString&, const CString&);

    inline UString operator+(const UString& s1, const UString& s2)
    {
        RefPtr<UString::Rep> result = concatenate(s1.rep(), s2.rep());
        return UString(result ? result.release() : UString::nullRep());
    }

    int compare(const UString&, const UString&);

    bool equal(const UString::Rep*, const UString::Rep*);

    inline PassRefPtr<UString::Rep> UString::Rep::create(PassRefPtr<UString::Rep> rep, int offset, int length)
    {
        ASSERT(rep);
        rep->checkConsistency();

        int repOffset = rep->offset;

        PassRefPtr<BaseString> base = rep->baseString();

        ASSERT(-(offset + repOffset) <= base->usedPreCapacity);
        ASSERT(offset + repOffset + length <= base->usedCapacity);

        // Steal the single reference this Rep was created with.
        return adoptRef(new Rep(base, repOffset + offset, length));
    }

    inline UChar* UString::Rep::data() const
    {
        const BaseString* base = baseString();
        return base->buf + base->preCapacity + offset;
    }

    inline void UString::Rep::setStatic(bool v)
    {
        ASSERT(!identifierTable());
        if (v)
            m_identifierTableAndFlags.setFlag(StaticFlag);
        else
            m_identifierTableAndFlags.clearFlag(StaticFlag);
    }

    inline void UString::Rep::setBaseString(PassRefPtr<BaseString> base)
    {
        ASSERT(base != this);
        ASSERT(!baseIsSelf());
        m_baseString = base.releaseRef();
    }

    inline UString::BaseString* UString::Rep::baseString()
    {
        return m_baseString;
    }

    inline const UString::BaseString* UString::Rep::baseString() const
    {
        return m_baseString;
    }

#ifdef NDEBUG
    inline void UString::Rep::checkConsistency() const
    {
    }
#endif

    inline UString::UString()
        : m_rep(&Rep::null())
    {
    }

    // Rule from ECMA 15.2 about what an array index is.
    // Must exactly match string form of an unsigned integer, and be less than 2^32 - 1.
    inline unsigned UString::toArrayIndex(bool* ok) const
    {
        unsigned i = toStrictUInt32(ok);
        if (ok && i >= 0xFFFFFFFFU)
            *ok = false;
        return i;
    }

    // We'd rather not do shared substring append for small strings, since
    // this runs too much risk of a tiny initial string holding down a
    // huge buffer.
    // FIXME: this should be size_t but that would cause warnings until we
    // fix UString sizes to be size_t instead of int
    static const int minShareSize = Heap::minExtraCostSize / sizeof(UChar);

    inline size_t UString::cost() const
    {
        BaseString* base = m_rep->baseString();
        size_t capacity = (base->capacity + base->preCapacity) * sizeof(UChar);
        size_t reportedCost = base->reportedCost;
        ASSERT(capacity >= reportedCost);

        size_t capacityDelta = capacity - reportedCost;

        if (capacityDelta < static_cast<size_t>(minShareSize))
            return 0;

        base->reportedCost = capacity;

        return capacityDelta;
    }

    struct IdentifierRepHash : PtrHash<RefPtr<JSC::UString::Rep> > {
        static unsigned hash(const RefPtr<JSC::UString::Rep>& key) { return key->computedHash(); }
        static unsigned hash(JSC::UString::Rep* key) { return key->computedHash(); }
    };

    void initializeUString();
} // namespace JSC

namespace WTF {

    template<typename T> struct DefaultHash;
    template<typename T> struct StrHash;

    template<> struct StrHash<JSC::UString::Rep*> {
        static unsigned hash(const JSC::UString::Rep* key) { return key->hash(); }
        static bool equal(const JSC::UString::Rep* a, const JSC::UString::Rep* b) { return JSC::equal(a, b); }
        static const bool safeToCompareToEmptyOrDeleted = false;
    };

    template<> struct StrHash<RefPtr<JSC::UString::Rep> > : public StrHash<JSC::UString::Rep*> {
        using StrHash<JSC::UString::Rep*>::hash;
        static unsigned hash(const RefPtr<JSC::UString::Rep>& key) { return key->hash(); }
        using StrHash<JSC::UString::Rep*>::equal;
        static bool equal(const RefPtr<JSC::UString::Rep>& a, const RefPtr<JSC::UString::Rep>& b) { return JSC::equal(a.get(), b.get()); }
        static bool equal(const JSC::UString::Rep* a, const RefPtr<JSC::UString::Rep>& b) { return JSC::equal(a, b.get()); }
        static bool equal(const RefPtr<JSC::UString::Rep>& a, const JSC::UString::Rep* b) { return JSC::equal(a.get(), b); }

        static const bool safeToCompareToEmptyOrDeleted = false;
    };

    template<> struct DefaultHash<JSC::UString::Rep*> {
        typedef StrHash<JSC::UString::Rep*> Hash;
    };

    template<> struct DefaultHash<RefPtr<JSC::UString::Rep> > {
        typedef StrHash<RefPtr<JSC::UString::Rep> > Hash;

    };

} // namespace WTF

#endif