001/*
002 * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
003 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
004 *
005 * This code is free software; you can redistribute it and/or modify it
006 * under the terms of the GNU General Public License version 2 only, as
007 * published by the Free Software Foundation.
008 *
009 * This code is distributed in the hope that it will be useful, but WITHOUT
010 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
011 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
012 * version 2 for more details (a copy is included in the LICENSE file that
013 * accompanied this code).
014 *
015 * You should have received a copy of the GNU General Public License version
016 * 2 along with this work; if not, write to the Free Software Foundation,
017 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
018 *
019 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
020 * or visit www.oracle.com if you need additional information or have any
021 * questions.
022 */
023package com.oracle.graal.hotspot.replacements.arraycopy;
024
025import static com.oracle.graal.hotspot.HotSpotGraalRuntime.*;
026import static com.oracle.graal.hotspot.replacements.HotSpotReplacementsUtil.*;
027import static com.oracle.graal.nodes.NamedLocationIdentity.*;
028import static com.oracle.graal.nodes.extended.BranchProbabilityNode.*;
029import static com.oracle.graal.replacements.SnippetTemplate.*;
030import jdk.internal.jvmci.code.*;
031import jdk.internal.jvmci.meta.*;
032
033import com.oracle.graal.api.replacements.*;
034import com.oracle.graal.asm.*;
035import com.oracle.graal.hotspot.meta.*;
036import com.oracle.graal.hotspot.phases.*;
037import com.oracle.graal.nodes.*;
038import com.oracle.graal.nodes.extended.*;
039import com.oracle.graal.nodes.spi.*;
040import com.oracle.graal.replacements.*;
041import com.oracle.graal.replacements.SnippetTemplate.AbstractTemplates;
042import com.oracle.graal.replacements.SnippetTemplate.Arguments;
043import com.oracle.graal.replacements.SnippetTemplate.SnippetInfo;
044import com.oracle.graal.replacements.nodes.*;
045import com.oracle.graal.word.*;
046
047/**
048 * As opposed to {@link ArrayCopySnippets}, these Snippets do <b>not</b> perform store checks.
049 */
050public class UnsafeArrayCopySnippets implements Snippets {
051    private static final boolean supportsUnalignedMemoryAccess = runtime().getTarget().arch.supportsUnalignedMemoryAccess();
052
053    private static final Kind VECTOR_KIND = Kind.Long;
054    private static final long VECTOR_SIZE = arrayIndexScale(VECTOR_KIND);
055
056    private static void vectorizedCopy(Object src, int srcPos, Object dest, int destPos, int length, Kind baseKind, LocationIdentity locationIdentity) {
057        int arrayBaseOffset = arrayBaseOffset(baseKind);
058        int elementSize = arrayIndexScale(baseKind);
059        long byteLength = (long) length * elementSize;
060        long srcOffset = (long) srcPos * elementSize;
061        long destOffset = (long) destPos * elementSize;
062
063        long preLoopBytes;
064        long mainLoopBytes;
065        long postLoopBytes;
066
067        // We can easily vectorize the loop if both offsets have the same alignment.
068        if (byteLength >= VECTOR_SIZE && (srcOffset % VECTOR_SIZE) == (destOffset % VECTOR_SIZE)) {
069            preLoopBytes = NumUtil.roundUp(arrayBaseOffset + srcOffset, VECTOR_SIZE) - (arrayBaseOffset + srcOffset);
070            postLoopBytes = (byteLength - preLoopBytes) % VECTOR_SIZE;
071            mainLoopBytes = byteLength - preLoopBytes - postLoopBytes;
072        } else {
073            // Does the architecture support unaligned memory accesses?
074            if (supportsUnalignedMemoryAccess) {
075                preLoopBytes = byteLength % VECTOR_SIZE;
076                mainLoopBytes = byteLength - preLoopBytes;
077                postLoopBytes = 0;
078            } else {
079                // No. Let's do element-wise copying.
080                preLoopBytes = byteLength;
081                mainLoopBytes = 0;
082                postLoopBytes = 0;
083            }
084        }
085
086        if (probability(NOT_FREQUENT_PROBABILITY, src == dest) && probability(NOT_FREQUENT_PROBABILITY, srcPos < destPos)) {
087            // bad aliased case
088            srcOffset += byteLength;
089            destOffset += byteLength;
090
091            // Post-loop
092            for (long i = 0; i < postLoopBytes; i += elementSize) {
093                srcOffset -= elementSize;
094                destOffset -= elementSize;
095                UnsafeCopyNode.copy(src, arrayBaseOffset + srcOffset, dest, arrayBaseOffset + destOffset, baseKind, locationIdentity);
096            }
097            // Main-loop
098            for (long i = 0; i < mainLoopBytes; i += VECTOR_SIZE) {
099                srcOffset -= VECTOR_SIZE;
100                destOffset -= VECTOR_SIZE;
101                UnsafeCopyNode.copy(src, arrayBaseOffset + srcOffset, dest, arrayBaseOffset + destOffset, VECTOR_KIND, locationIdentity);
102            }
103            // Pre-loop
104            for (long i = 0; i < preLoopBytes; i += elementSize) {
105                srcOffset -= elementSize;
106                destOffset -= elementSize;
107                UnsafeCopyNode.copy(src, arrayBaseOffset + srcOffset, dest, arrayBaseOffset + destOffset, baseKind, locationIdentity);
108            }
109        } else {
110            // Pre-loop
111            for (long i = 0; i < preLoopBytes; i += elementSize) {
112                UnsafeCopyNode.copy(src, arrayBaseOffset + srcOffset, dest, arrayBaseOffset + destOffset, baseKind, locationIdentity);
113                srcOffset += elementSize;
114                destOffset += elementSize;
115            }
116            // Main-loop
117            for (long i = 0; i < mainLoopBytes; i += VECTOR_SIZE) {
118                UnsafeCopyNode.copy(src, arrayBaseOffset + srcOffset, dest, arrayBaseOffset + destOffset, VECTOR_KIND, locationIdentity);
119                srcOffset += VECTOR_SIZE;
120                destOffset += VECTOR_SIZE;
121            }
122            // Post-loop
123            for (long i = 0; i < postLoopBytes; i += elementSize) {
124                UnsafeCopyNode.copy(src, arrayBaseOffset + srcOffset, dest, arrayBaseOffset + destOffset, baseKind, locationIdentity);
125                srcOffset += elementSize;
126                destOffset += elementSize;
127            }
128        }
129    }
130
131    @Fold
132    private static LocationIdentity getArrayLocation(Kind kind) {
133        return NamedLocationIdentity.getArrayLocation(kind);
134    }
135
136    @Snippet
137    public static void arraycopyByte(byte[] src, int srcPos, byte[] dest, int destPos, int length) {
138        Kind kind = Kind.Byte;
139        vectorizedCopy(src, srcPos, dest, destPos, length, kind, getArrayLocation(kind));
140    }
141
142    @Snippet
143    public static void arraycopyBoolean(boolean[] src, int srcPos, boolean[] dest, int destPos, int length) {
144        Kind kind = Kind.Boolean;
145        vectorizedCopy(src, srcPos, dest, destPos, length, kind, getArrayLocation(kind));
146    }
147
148    @Snippet
149    public static void arraycopyChar(char[] src, int srcPos, char[] dest, int destPos, int length) {
150        Kind kind = Kind.Char;
151        vectorizedCopy(src, srcPos, dest, destPos, length, kind, getArrayLocation(kind));
152    }
153
154    @Snippet
155    public static void arraycopyShort(short[] src, int srcPos, short[] dest, int destPos, int length) {
156        Kind kind = Kind.Short;
157        vectorizedCopy(src, srcPos, dest, destPos, length, kind, getArrayLocation(kind));
158    }
159
160    @Snippet
161    public static void arraycopyInt(int[] src, int srcPos, int[] dest, int destPos, int length) {
162        Kind kind = Kind.Int;
163        vectorizedCopy(src, srcPos, dest, destPos, length, kind, getArrayLocation(kind));
164    }
165
166    @Snippet
167    public static void arraycopyFloat(float[] src, int srcPos, float[] dest, int destPos, int length) {
168        Kind kind = Kind.Float;
169        vectorizedCopy(src, srcPos, dest, destPos, length, kind, getArrayLocation(kind));
170    }
171
172    @Snippet
173    public static void arraycopyLong(long[] src, int srcPos, long[] dest, int destPos, int length) {
174        Kind kind = Kind.Long;
175        vectorizedCopy(src, srcPos, dest, destPos, length, kind, getArrayLocation(kind));
176    }
177
178    @Snippet
179    public static void arraycopyDouble(double[] src, int srcPos, double[] dest, int destPos, int length) {
180        Kind kind = Kind.Double;
181        /*
182         * TODO atomicity problem on 32-bit architectures: The JVM spec requires double values to be
183         * copied atomically, but not long values. For example, on Intel 32-bit this code is not
184         * atomic as long as the vector kind remains Kind.Long.
185         */
186        vectorizedCopy(src, srcPos, dest, destPos, length, kind, getArrayLocation(kind));
187    }
188
189    /**
190     * For this kind, Object, we want to avoid write barriers between writes, but instead have them
191     * at the end of the snippet. This is done by using {@link DirectObjectStoreNode}, and rely on
192     * {@link WriteBarrierAdditionPhase} to put write barriers after the {@link UnsafeArrayCopyNode}
193     * with kind Object.
194     */
195    @Snippet
196    public static void arraycopyObject(Object[] src, int srcPos, Object[] dest, int destPos, int length) {
197        Kind kind = Kind.Object;
198        final int scale = arrayIndexScale(kind);
199        int arrayBaseOffset = arrayBaseOffset(kind);
200        LocationIdentity arrayLocation = getArrayLocation(kind);
201        if (src == dest && srcPos < destPos) { // bad aliased case
202            long start = (long) (length - 1) * scale;
203            for (long i = start; i >= 0; i -= scale) {
204                Object a = UnsafeLoadNode.load(src, arrayBaseOffset + i + (long) srcPos * scale, kind, arrayLocation);
205                DirectObjectStoreNode.storeObject(dest, arrayBaseOffset, i + (long) destPos * scale, a, getArrayLocation(kind), kind);
206            }
207        } else {
208            long end = (long) length * scale;
209            for (long i = 0; i < end; i += scale) {
210                Object a = UnsafeLoadNode.load(src, arrayBaseOffset + i + (long) srcPos * scale, kind, arrayLocation);
211                DirectObjectStoreNode.storeObject(dest, arrayBaseOffset, i + (long) destPos * scale, a, getArrayLocation(kind), kind);
212            }
213        }
214    }
215
216    @Snippet
217    public static void arraycopyPrimitive(Object src, int srcPos, Object dest, int destPos, int length, int layoutHelper) {
218        int log2ElementSize = (layoutHelper >> layoutHelperLog2ElementSizeShift()) & layoutHelperLog2ElementSizeMask();
219        int headerSize = (layoutHelper >> layoutHelperHeaderSizeShift()) & layoutHelperHeaderSizeMask();
220
221        Unsigned vectorSize = Word.unsigned(VECTOR_SIZE);
222        Unsigned srcOffset = Word.unsigned(srcPos).shiftLeft(log2ElementSize).add(headerSize);
223        Unsigned destOffset = Word.unsigned(destPos).shiftLeft(log2ElementSize).add(headerSize);
224        Unsigned destStart = destOffset;
225        Unsigned destEnd = destOffset.add(Word.unsigned(length).shiftLeft(log2ElementSize));
226
227        Unsigned destVectorEnd = null;
228        Unsigned nonVectorBytes = null;
229        Unsigned sizeInBytes = Word.unsigned(length).shiftLeft(log2ElementSize);
230        if (supportsUnalignedMemoryAccess) {
231            nonVectorBytes = sizeInBytes.unsignedRemainder(vectorSize);
232            destVectorEnd = destEnd;
233        } else {
234            boolean inPhase = srcOffset.and((int) VECTOR_SIZE - 1).equal(destOffset.and((int) VECTOR_SIZE - 1));
235            boolean hasAtLeastOneVector = sizeInBytes.aboveOrEqual(vectorSize);
236            // We must have at least one full vector, otherwise we must copy each byte separately
237            if (hasAtLeastOneVector && inPhase) { // If in phase, we can vectorize
238                nonVectorBytes = vectorSize.subtract(destStart.unsignedRemainder(vectorSize));
239            } else { // fallback is byte-wise
240                nonVectorBytes = sizeInBytes;
241            }
242            destVectorEnd = destEnd.subtract(destEnd.unsignedRemainder(vectorSize));
243        }
244
245        Unsigned destNonVectorEnd = destStart.add(nonVectorBytes);
246        while (destOffset.belowThan(destNonVectorEnd)) {
247            ObjectAccess.writeByte(dest, destOffset, ObjectAccess.readByte(src, srcOffset, any()), any());
248            destOffset = destOffset.add(1);
249            srcOffset = srcOffset.add(1);
250        }
251        // Unsigned destVectorEnd = destEnd.subtract(destEnd.unsignedRemainder(8));
252        while (destOffset.belowThan(destVectorEnd)) {
253            ObjectAccess.writeWord(dest, destOffset, ObjectAccess.readWord(src, srcOffset, any()), any());
254            destOffset = destOffset.add(wordSize());
255            srcOffset = srcOffset.add(wordSize());
256        }
257        // Do the last bytes each when it is required to have absolute alignment.
258        while (!supportsUnalignedMemoryAccess && destOffset.belowThan(destEnd)) {
259            ObjectAccess.writeByte(dest, destOffset, ObjectAccess.readByte(src, srcOffset, any()), any());
260            destOffset = destOffset.add(1);
261            srcOffset = srcOffset.add(1);
262        }
263    }
264
265    public static class Templates extends AbstractTemplates {
266
267        private final SnippetInfo[] arraycopySnippets;
268        private final SnippetInfo genericPrimitiveSnippet;
269
270        public Templates(HotSpotProviders providers, TargetDescription target) {
271            super(providers, providers.getSnippetReflection(), target);
272
273            arraycopySnippets = new SnippetInfo[Kind.values().length];
274            arraycopySnippets[Kind.Boolean.ordinal()] = snippet(UnsafeArrayCopySnippets.class, "arraycopyBoolean");
275            arraycopySnippets[Kind.Byte.ordinal()] = snippet(UnsafeArrayCopySnippets.class, "arraycopyByte");
276            arraycopySnippets[Kind.Short.ordinal()] = snippet(UnsafeArrayCopySnippets.class, "arraycopyShort");
277            arraycopySnippets[Kind.Char.ordinal()] = snippet(UnsafeArrayCopySnippets.class, "arraycopyChar");
278            arraycopySnippets[Kind.Int.ordinal()] = snippet(UnsafeArrayCopySnippets.class, "arraycopyInt");
279            arraycopySnippets[Kind.Long.ordinal()] = snippet(UnsafeArrayCopySnippets.class, "arraycopyLong");
280            arraycopySnippets[Kind.Float.ordinal()] = snippet(UnsafeArrayCopySnippets.class, "arraycopyFloat");
281            arraycopySnippets[Kind.Double.ordinal()] = snippet(UnsafeArrayCopySnippets.class, "arraycopyDouble");
282            arraycopySnippets[Kind.Object.ordinal()] = snippet(UnsafeArrayCopySnippets.class, "arraycopyObject");
283
284            genericPrimitiveSnippet = snippet(UnsafeArrayCopySnippets.class, "arraycopyPrimitive");
285        }
286
287        public void lower(UnsafeArrayCopyNode node, LoweringTool tool) {
288            Kind elementKind = node.getElementKind();
289            SnippetInfo snippet;
290            if (elementKind == null) {
291                // primitive array of unknown kind
292                snippet = genericPrimitiveSnippet;
293            } else {
294                snippet = arraycopySnippets[elementKind.ordinal()];
295                assert snippet != null : "arraycopy snippet for " + elementKind.name() + " not found";
296            }
297
298            Arguments args = new Arguments(snippet, node.graph().getGuardsStage(), tool.getLoweringStage());
299            node.addSnippetArguments(args);
300
301            SnippetTemplate template = template(args);
302            template.instantiate(providers.getMetaAccess(), node, DEFAULT_REPLACER, args);
303        }
304    }
305}