Raspberry Pi
category: code [glöplog]
Apparently they removed the 1 per person restriction and are assembling / selling 4000 a day, and if you order it today you can expect them in... october :(
I released a prod (with source code) at Solskogen: http://pouet.net/prod.php?which=59518
Tiny case of tl:dr, but if anyone is ordering, get yours from the Farnell/Element14 distributor and not RS, looking at a 4-5 week lead time instead of 11..
..and you'll get an element14/RPi T-Shirt for free ;-)
A small Raspberry Pi demo was released at Edison as well. http://pouet.net/prod.php?which=59520
Video or it didn't happen
Quote:
Raspian OS
Yes, another linux distri.. Exactly what the world was waiting for.
WO HO mine otw :D !!
I bet someone here has a short piece of float to half float code I can copy-paste... right?
Here you go. Untested, but you'll get the idea...
Header:
cpp:
Header:
Code:
#include <stdint.h>
//"Half" float value conversion routines.
//Based on this: http://stackoverflow.com/questions/1659440/32-bit-to-16-bit-floating-point-conversion
typedef uint16_t Half;
class FastHalfCompressor
{
union Bits {
float f;
int32_t si;
uint32_t ui;
};
static int const shift = 13;
static int const shiftSign = 16;
static int32_t const infN = 0x7F800000; // flt32 infinity
static int32_t const maxN = 0x477FE000; // max flt16 normal as a flt32
static int32_t const minN = 0x38800000; // min flt16 normal as a flt32
static int32_t const signN = 0x80000000; // flt32 sign bit
static int32_t const infC = infN >> shift;
static int32_t const nanN = (infC + 1) << shift; // minimum flt16 nan as a flt32
static int32_t const maxC = maxN >> shift;
static int32_t const minC = minN >> shift;
static int32_t const signC = signN >> shiftSign; // flt16 sign bit
static int32_t const mulN = 0x52000000; // (1 << 23) / minN
static int32_t const mulC = 0x33800000; // minN / (1 << (23 - shift))
static int32_t const subC = 0x003FF; // max flt32 subnormal down shifted
static int32_t const norC = 0x00400; // min flt32 normal down shifted
static int32_t const maxD = infC - maxC - 1;
static int32_t const minD = minC - subC - 1;
public:
/*!
Convert float value to half-float value (GL_HALF / IEEE 754).
*/
static inline Half toHalf(const float & value);
/*!
Convert half-float value to float value (GL_HALF / IEEE 754).
*/
static inline float toFloat(const Half & value);
};
class PreciseHalfCompressor
{
union Bits {
float f;
int32_t si;
uint32_t ui;
};
bool hasNegatives;
bool noLoss;
int32_t _maxF;
int32_t _minF;
int32_t _epsF;
int32_t _maxC;
int32_t _zeroC;
int32_t _pDelta;
int32_t _nDelta;
int _shift;
static int32_t const signF = 0x80000000;
static int32_t const absF = ~signF;
public:
PreciseHalfCompressor(float min, float epsilon, float max, int precision);
float clamp(float value);
uint32_t toHalf(float value);
float toFloat(uint32_t value);
};
/*Table based-approach
/* These macros implement a finite iterator useful to build lookup
* tables. For instance, S64(0) will call S1(x) for all values of x
* between 0 and 63.
* Due to the exponential behaviour of the calls, the stress on the
* compiler may be important. */
#define S4(x) S1((x)), S1((x)+1), S1((x)+2), S1((x)+3)
#define S16(x) S4((x)), S4((x)+4), S4((x)+8), S4((x)+12)
#define S64(x) S16((x)), S16((x)+16), S16((x)+32), S16((x)+48)
#define S256(x) S64((x)), S64((x)+64), S64((x)+128), S64((x)+192)
#define S1024(x) S256((x)), S256((x)+256), S256((x)+512), S256((x)+768)
/* Lookup table-based algorithm from “Fast Half Float Conversions”
* by Jeroen van der Zijp, November 2008. No rounding is performed,
* and some NaN values may be incorrectly converted to Inf. */
static inline uint16_t float_to_half_nobranch(uint32_t x)
{
static uint16_t const basetable[512] =
{
#define S1(i) (((i) < 103) ? 0x0000 : \
((i) < 113) ? 0x0400 >> (113 - (i)) : \
((i) < 143) ? ((i) - 112) << 10 : 0x7c00)
S256(0),
#undef S1
#define S1(i) (0x8000 | (((i) < 103) ? 0x0000 : \
((i) < 113) ? 0x0400 >> (113 - (i)) : \
((i) < 143) ? ((i) - 112) << 10 : 0x7c00))
S256(0),
#undef S1
};
static uint8_t const shifttable[512] =
{
#define S1(i) (((i) < 103) ? 24 : \
((i) < 113) ? 126 - (i) : \
((i) < 143 || (i) == 255) ? 13 : 24)
S256(0), S256(0),
#undef S1
};
uint16_t bits = basetable[(x >> 23) & 0x1ff];
bits |= (x & 0x007fffff) >> shifttable[(x >> 23) & 0x1ff];
return bits;
}
*/
/*
/* This method is faster than the OpenEXR implementation (very often
* used, eg. in Ogre), with the additional benefit of rounding, inspired
* by James Tursa’s half-precision code. */
static inline uint16_t float_to_half_branch(uint32_t x)
{
uint16_t bits = (x >> 16) & 0x8000; /* Get the sign */
uint16_t m = (x >> 12) & 0x07ff; /* Keep one extra bit for rounding */
unsigned int e = (x >> 23) & 0xff; /* Using int is faster here */
/* If zero, or denormal, or exponent underflows too much for a denormal
* half, return signed zero. */
if (e < 103)
return bits;
/* If NaN, return NaN. If Inf or exponent overflow, return Inf. */
if (e > 142)
{
bits |= 0x7c00u;
/* If exponent was 0xff and one mantissa bit was set, it means NaN,
* not Inf, so make sure we set one mantissa bit too. */
bits |= e == 255 && (x & 0x007fffffu);
return bits;
}
/* If exponent underflows but not too much, return a denormal */
if (e < 113)
{
m |= 0x0800u;
/* Extra rounding may overflow and set mantissa to 0 and exponent
* to 1, which is OK. */
bits |= (m >> (114 - e)) + ((m >> (113 - e)) & 1);
return bits;
}
bits |= ((e - 112) << 10) | (m >> 1);
/* Extra rounding. An overflow will set mantissa to 0 and increment
* the exponent, which is OK. */
bits += m & 1;
return bits;
}
*/
cpp:
Code:
#include "half.h"
inline Half FastHalfCompressor::toHalf(const float & value)
{
Bits v, s;
v.f = value;
uint32_t sign = v.si & signN;
v.si ^= sign;
sign >>= shiftSign; // logical shift
s.si = mulN;
s.si = (uint32_t)(s.f * v.f); // correct subnormals
v.si ^= (s.si ^ v.si) & -(minN > v.si);
v.si ^= (infN ^ v.si) & -((infN > v.si) & (v.si > maxN));
v.si ^= (nanN ^ v.si) & -((nanN > v.si) & (v.si > infN));
v.ui >>= shift; // logical shift
v.si ^= ((v.si - maxD) ^ v.si) & -(v.si > maxC);
v.si ^= ((v.si - minD) ^ v.si) & -(v.si > subC);
return v.ui | sign;
}
inline float FastHalfCompressor::toFloat(const Half & value)
{
Bits v;
v.ui = value;
int32_t sign = v.si & signC;
v.si ^= sign;
sign <<= shiftSign;
v.si ^= ((v.si + minD) ^ v.si) & -(v.si > subC);
v.si ^= ((v.si + maxD) ^ v.si) & -(v.si > maxC);
Bits s;
s.si = mulC;
s.f *= v.si;
int32_t mask = -(norC > v.si);
v.si <<= shift;
v.si ^= (s.si ^ v.si) & mask;
v.si |= sign;
return v.f;
}
PreciseHalfCompressor::PreciseHalfCompressor(float min, float epsilon, float max, int precision)
{
// legal values
// min <= 0 < epsilon < max
// 0 <= precision <= 23
_shift = 23 - precision;
Bits v;
v.f = min;
_minF = v.si;
v.f = epsilon;
_epsF = v.si;
v.f = max;
_maxF = v.si;
hasNegatives = _minF < 0;
noLoss = _shift == 0;
int32_t pepsU, nepsU;
if(noLoss) {
nepsU = _epsF;
pepsU = _epsF ^ signF;
_maxC = _maxF ^ signF;
_zeroC = signF;
} else {
nepsU = uint32_t(_epsF ^ signF) >> _shift;
pepsU = uint32_t(_epsF) >> _shift;
_maxC = uint32_t(_maxF) >> _shift;
_zeroC = 0;
}
_pDelta = pepsU - _zeroC - 1;
_nDelta = nepsU - _maxC - 1;
}
float PreciseHalfCompressor::clamp(float value)
{
Bits v;
v.f = value;
int32_t max = _maxF;
if(hasNegatives)
max ^= (_minF ^ _maxF) & -(0 > v.si);
v.si ^= (max ^ v.si) & -(v.si > max);
v.si &= -(_epsF <= (v.si & absF));
return v.f;
}
uint32_t PreciseHalfCompressor::toHalf(float value)
{
Bits v;
v.f = clamp(value);
if(noLoss)
v.si ^= signF;
else
v.ui >>= _shift;
if(hasNegatives)
v.si ^= ((v.si - _nDelta) ^ v.si) & -(v.si > _maxC);
v.si ^= ((v.si - _pDelta) ^ v.si) & -(v.si > _zeroC);
if(noLoss)
v.si ^= signF;
return v.ui;
}
float PreciseHalfCompressor::toFloat(uint32_t value)
{
Bits v;
v.ui = value;
if(noLoss)
v.si ^= signF;
v.si ^= ((v.si + _pDelta) ^ v.si) & -(v.si > _zeroC);
if(hasNegatives)
v.si ^= ((v.si + _nDelta) ^ v.si) & -(v.si > _maxC);
if(noLoss)
v.si ^= signF;
else
v.si <<= _shift;
return v.f;
}
Half-floats are built-in to (ARM) GCC. Just use __fp16 type:
compile with:
arm-none-linux-gnueabi-gcc -mfp16-format=ieee test.c -o test
assuming suitably-recent GCC. It'll use hardware instructions with a suitable VFP variant (of which the r-pi's isn't one), otherwise software helpers.
Code:
#include <math.h>
__fp16 myarr[100];
float __attribute__((noinline)) foo (void)
{
return myarr[0] * myarr[3];
}
int main (void)
{
myarr[0] = 1.0;
myarr[3] = 3.0;
return (int) foo () - 3.0;
}
compile with:
arm-none-linux-gnueabi-gcc -mfp16-format=ieee test.c -o test
assuming suitably-recent GCC. It'll use hardware instructions with a suitable VFP variant (of which the r-pi's isn't one), otherwise software helpers.
ahaha @torus, you're totally right, they can't focus on one distrib, always making their own shit
Yeah, fuck those Linux developers who just go ahead and code stuff that's better than what currently exists, instead of sitting around on standardisation committees!!!!!
Did anyone try to use glDiscardFramebufferEXT? I get a seg fault when I try it. Am I doing something wrong or is it the drivers fault?
Just ordered mine.
gasman: having a gazillion window systems and making the user choose on install sure helps boost adoptation from non-Linux users.
And having a recompiled distro which finally uses an ABI which fully supports the FPU is totally useless, integer float emulation is so much more fun.
looks like a nice media-station for 73 bucks: http://dx.com/p/ak802-mini-android-4-0-network-media-player-w-wi-fi-hdmi-tf-usb-black-4gb-1gb-ddr-iii-143431
Android 4.0
Chipset Allwinner A10; Cortex A8
Processor Frequency 1.5GHz
RAM 1GB DDR III
Internal Memory Built-in 4GB NAND flash memory
Memory Card Type Supports TF card up to 32GB
Expansion Slot USB 2.0
External Storage Supports external HDD up to 2TB
Audio Format AAC, AAC+, eAAC+, AMR-NB, AMR-WB, QCP, MP3, WMA, WAV, MIDI, M4A
Video Format WMV, ASF, MP4, 3GP, 3G2M4V, AVI, MJPEG, RV10, DivX, VC-1, MPEG-2, MPEG-4, H.263, H.264, 1280*720P HD 30 fps, 1080P, 720*480 D1 30fps
Output Resolution 2160p
Android 4.0
Chipset Allwinner A10; Cortex A8
Processor Frequency 1.5GHz
RAM 1GB DDR III
Internal Memory Built-in 4GB NAND flash memory
Memory Card Type Supports TF card up to 32GB
Expansion Slot USB 2.0
External Storage Supports external HDD up to 2TB
Audio Format AAC, AAC+, eAAC+, AMR-NB, AMR-WB, QCP, MP3, WMA, WAV, MIDI, M4A
Video Format WMV, ASF, MP4, 3GP, 3G2M4V, AVI, MJPEG, RV10, DivX, VC-1, MPEG-2, MPEG-4, H.263, H.264, 1280*720P HD 30 fps, 1080P, 720*480 D1 30fps
Output Resolution 2160p
Quote:
And having a recompiled distro which finally uses an ABI which fully supports the FPU is totally useless, integer float emulation is so much more fun.
If only linux was open source! Then they could have simply updated the original distro and saved all of the confusion and fragmentation.
psonice: Yeah. Then they could have given it a name that clearly indicates "this is the updated version of Debian that you should use with your Raspberry Pi", like, say, Raspbian or something, and the Raspberry Pi people could link to it on their downloads page and not even have to mention the old Debian release at all any more. Yes, that would be a good idea.
(Or do you mean that they should merge this into core Debian itself, so that people running mission-critical web servers and shit get a nice shiny upgrade full of experimental code that's only a few days old? Yeah, can't see any problems with that.)
(Or do you mean that they should merge this into core Debian itself, so that people running mission-critical web servers and shit get a nice shiny upgrade full of experimental code that's only a few days old? Yeah, can't see any problems with that.)
Pick which belongs to the Demoscene, and which to the Linux distro-making "movement":
[ ] time wasting
[ ] apparently pointless
[ ] arguably contributing to art
[ ] arguably contributing to technology
[ ] full of overzealous people
:)
[ ] time wasting
[ ] apparently pointless
[ ] arguably contributing to art
[ ] arguably contributing to technology
[ ] full of overzealous people
:)
[X] time wasting
[X] boobs
[x] coobs