単精度で浮動小数点数の平方根の逆数を求める。つもり
verilog で書いてみた。
q = 1/sqrt(x)
ごみな行は消していない。
//2024 04 27 ..05 18
`timescale 1ns/1ps
`include "float.v"
module tb;
reg clk;
reg [31:0] x; // = 32'h3f800000;
wire [31:0] ax;
wire [31:0] tblx,tx;
wire [8:0] e;
wire [7:0] oute;
wire[31:0] q;
InvSQRT dut( clk,x,ax,tblx,tx,e,oute,q);
initial begin
$dumpfile("InvSQRT.vcd");
$dumpvars(1, tb);
end
parameter STEP = 10;
initial begin
clk = 1'b0;
#STEP ;
repeat ( 1200) begin
#STEP clk = ~clk;
end
end
initial begin
x = 32'h00000001;
#(STEP*13) x = 32'h38000000; //
#(STEP*13) x = 32'h3E400000; //0.1875
#(STEP*13) x = 32'h3E800000; //0.25
#(STEP*14) x = 32'h3EA00000; //
#(STEP*14) x = 32'h3F000000; //0.5
#(STEP*14) x = 32'h3F400000; //0.75
#(STEP*14) x = 32'h3f800000; //1.0
#(STEP*14) x = 32'h3f900000; //1.125
#(STEP*14) x = 32'h3fc00000; //
#(STEP*14) x = 32'h40000000; //2.0
#(STEP*14) x = 32'h40400000; //3
#(STEP*14) x = 32'h40800000; //4.0
#(STEP*14) x = 32'h40A00000; //
#(STEP*14) x = 32'h40f00000; //7.5
#(STEP*14) x = 32'h41000000; //8.0
#(STEP*14) x= 32'h41100000; //9
#(STEP*14) x = 32'h42000000; //32
#(STEP*14) x = 32'h4fffffff; //
#(STEP*14) x = 32'h7effffff; //
#(STEP*13) x = 32'h01000000;
#STEP;
end
endmodule
// bit[31] sign ビットは無考慮。マイナス入れちゃダメ。
// wire sgn, [7:0]exp, [22:0]man;
module InvSQRT( input clk, input [31:0] x, output [31:0] ax,
output [31:0] tblx,output [31:0] tx, output [8:0] e,output [7:0]oute,
output [31:0] q);
// wire [31:0] Xo= 32'h3F800000; //初期値 1.0
wire [31:0] r;
wire [31:0] aligned_x;
wire sgn;
wire [07:0] exp;
wire [22:0] man;
//assign {sgn,exp[7:0],man[22:0]} = x[31:0];
assign sgn = x[31];
assign exp[7:0] = x[30:23];
assign man[22:0]= x[22:00];
assign e[8:0] = {1'b0,exp[7:0]}- 9'd127; //オフセット引く 0..e..255=(-1)
//aligned_xは(1..4)
wire [7:0] ae ={6'b0,e[0]}; //( e[0] ? 8'd128 : 8'd127); //{5'b0,e[1:0]};//
wire [7:0] ie = ( e[0] ? 8'd128 : 8'd127);
wire [31:0] ix = {sgn,ie,man};
assign aligned_x = {sgn,ae[7:0],man[22:0]};
assign ax = {sgn,ie,man}; //aligned_x;
parameter Nstage = 2;
wire [31:0] X[Nstage:0];
InvSQRTtable tbl( .x(aligned_x), .val(r)); // look up table for Xo
assign X[0] = r[31:0];
assign tblx[31:0] = r[31:0];
generate
genvar k;
for (k = 0; k < Nstage; k = k + 1) begin
: invsqrt_stage_gen
invsqrt_stage stg( .clk(clk), .x(ix), .Xo(X[k]), .out(X[k+1]));
end
endgenerate
//wire [31:0] tx = X[Nstage];
assign tx = X[Nstage];
wire [22:0] om = tx[22:0];
wire [7:0] a,b,ab;
assign a = (exp-ie)+8'd127;
assign b = 8'd127;
assign ab = ((a>=b) ? a - b: b-a) >> 1;
wire sgn_ab = ( a>=b ? 1 : 0);
//ここで嵌ってた。入力xを1から4に整えてテーブルへ食わせたいんだけど
//IEEEな浮動小数点の表現は1<=x<2なんで、テーブルを1..4にするためには
//指数部の最下位ビットをもらってきて入れる。
//
assign oute = (sgn_ab ? 8'd127-ab : 8'd127+ab)+ (tx[30:23]-8'd127);
// + ((ie-exp)); //((ie-8'd127)>>1);
assign q = {sgn,oute[7:0],om[22:0]};
endmodule
// Xn+1 = 0.5*Xn( 3- x*Xn^2)
// = x(1.5-0.5*d*x*x)
// q = 1/sqrt(x)
module invsqrt_stage(
input clk,
input [31:0] x, input [31:0] Xo, // x, initial x
output [31:0]out
);
wire [31:0] flTHREE= 32'h40400000; // 3.0
wire [31:0] flONE = 32'h3f800000; // 1.0
wire [31:0] fl0R5 = 32'h3F000000; // 0.5
wire [31:0] XoXo; // Xn*Xn
fl_multa m1(
.clk( clk),
.fl_o( XoXo), //out
.fl_a( Xo), //in
.fl_b( Xo) //in
);
wire [31:0] xXoXo; // x Xn Xn
fl_multa m2(
.clk(clk),
.fl_o( xXoXo),
.fl_a( x),
.fl_b( XoXo)
);
wire [31:0] tmp10; // 3-x*Xo*Xo
fl_absba s1(
.fl_o( tmp10),
.fl_ia( flTHREE),
.fl_ib( xXoXo),
.ope( 1'b1) //sub
);
wire [31:0] tmp11; // Xo(3-x*Xo*Xo)
fl_multa m3(
.clk(clk),
.fl_o( tmp11),
.fl_a( Xo),
.fl_b( tmp10)
);
//wire [31:0] X1; // X1 = (1/2)Xo(3-x*Xo*Xo)
fl_multa m4(
.clk( clk),
.fl_o( out), //out
.fl_a( fl0R5),
.fl_b( tmp11)
);
endmodule // _stage()
// initial Xo
// q = 1/sqrt(x)
// [1..x..4] [1/2 .. q .. 1]
/* xは1が最小値。4ビット=16分割したテーブル
次のindexと今のindexにおけるxの値の平均 xm = (x[n+1]+x[n] )/2 から
初期値 Xo = 1/sqrt(xm)を求めておく。
指数部の最下部1ビットと仮数部の上位5ビットでテーブルから初期値を引く
*/
// 7bit 1/sqrt(x) table
module InvSQRTtable( input [31:0] x, output [31:0] val);
wire [31:0] r;
//wire [1:0] exp_lsb =( x[24:23] ); 2024/05/16 19:52:01
wire exp_lsb = x[23];
//expの最小位ビット
wire [5:0] snf_uppr = x[22:17]; //
wire [6:0] e_snf = {exp_lsb, snf_uppr[5:0]};
function [31:0] lookTable;
input [6:0] e_snf;
casex( {e_snf[6:0]})
/* 0 1.007812*/ 7'b0000000: lookTable = 32'h3f800000; //32'h3f7f017e; //0.996117
/* 1 1.023438*/ 7'b0000001: lookTable = 32'h3f7d0d3e; //0.988483
/* 2 1.039062*/ 7'b0000010: lookTable = 32'h3f7b2452; //0.981023
/* 3 1.054688*/ 7'b0000011: lookTable = 32'h3f79464e; //0.973729
/* 4 1.070312*/ 7'b0000100: lookTable = 32'h3f7772cb; //0.966595
/* 5 1.085938*/ 7'b0000101: lookTable = 32'h3f75a969; //0.959616
/* 6 1.101562*/ 7'b0000110: lookTable = 32'h3f73e9cb; //0.952786
/* 7 1.117188*/ 7'b0000111: lookTable = 32'h3f723399; //0.946100
/* 8 1.132812*/ 7'b0001000: lookTable = 32'h3f708681; //0.939552
/* 9 1.148438*/ 7'b0001001: lookTable = 32'h3f6ee232; //0.933139
/* 10 1.164062*/ 7'b0001010: lookTable = 32'h3f6d4660; //0.926855
/* 11 1.179688*/ 7'b0001011: lookTable = 32'h3f6bb2c5; //0.920697
/* 12 1.195312*/ 7'b0001100: lookTable = 32'h3f6a271a; //0.914659
/* 13 1.210938*/ 7'b0001101: lookTable = 32'h3f68a31d; //0.908739
/* 14 1.226562*/ 7'b0001110: lookTable = 32'h3f672691; //0.902932
/* 15 1.242188*/ 7'b0001111: lookTable = 32'h3f65b139; //0.897235
/* 16 1.257812*/ 7'b0010000: lookTable = 32'h3f6442db; //0.891645
/* 17 1.273438*/ 7'b0010001: lookTable = 32'h3f62db41; //0.886158
/* 18 1.289062*/ 7'b0010010: lookTable = 32'h3f617a36; //0.880771
/* 19 1.304688*/ 7'b0010011: lookTable = 32'h3f601f87; //0.875481
/* 20 1.320312*/ 7'b0010100: lookTable = 32'h3f5ecb04; //0.870285
/* 21 1.335938*/ 7'b0010101: lookTable = 32'h3f5d7c7f; //0.865181
/* 22 1.351562*/ 7'b0010110: lookTable = 32'h3f5c33cc; //0.860165
/* 23 1.367188*/ 7'b0010111: lookTable = 32'h3f5af0bf; //0.855236
/* 24 1.382812*/ 7'b0011000: lookTable = 32'h3f59b330; //0.850390
/* 25 1.398438*/ 7'b0011001: lookTable = 32'h3f587af7; //0.845626
/* 26 1.414062*/ 7'b0011010: lookTable = 32'h3f5747ee; //0.840941
/* 27 1.429688*/ 7'b0011011: lookTable = 32'h3f5619f2; //0.836333
/* 28 1.445312*/ 7'b0011100: lookTable = 32'h3f54f0df; //0.831800
/* 29 1.460938*/ 7'b0011101: lookTable = 32'h3f53cc93; //0.827340
/* 30 1.476562*/ 7'b0011110: lookTable = 32'h3f52acee; //0.822951
/* 31 1.492188*/ 7'b0011111: lookTable = 32'h3f5191d1; //0.818631
/* 32 1.507812*/ 7'b0100000: lookTable = 32'h3f507b1d; //0.814379
/* 33 1.523438*/ 7'b0100001: lookTable = 32'h3f4f68b6; //0.810192
/* 34 1.539062*/ 7'b0100010: lookTable = 32'h3f4e5a7f; //0.806068
/* 35 1.554688*/ 7'b0100011: lookTable = 32'h3f4d505e; //0.802008
/* 36 1.570312*/ 7'b0100100: lookTable = 32'h3f4c4a38; //0.798007
/* 37 1.585938*/ 7'b0100101: lookTable = 32'h3f4b47f4; //0.794067
/* 38 1.601562*/ 7'b0100110: lookTable = 32'h3f4a497a; //0.790184
/* 39 1.617188*/ 7'b0100111: lookTable = 32'h3f494eb3; //0.786357
/* 40 1.632812*/ 7'b0101000: lookTable = 32'h3f485787; //0.782586
/* 41 1.648438*/ 7'b0101001: lookTable = 32'h3f4763e2; //0.778868
/* 42 1.664062*/ 7'b0101010: lookTable = 32'h3f4673ac; //0.775203
/* 43 1.679688*/ 7'b0101011: lookTable = 32'h3f4586d3; //0.771589
/* 44 1.695312*/ 7'b0101100: lookTable = 32'h3f449d42; //0.768025
/* 45 1.710938*/ 7'b0101101: lookTable = 32'h3f43b6e6; //0.764510
/* 46 1.726562*/ 7'b0101110: lookTable = 32'h3f42d3ad; //0.761042
/* 47 1.742188*/ 7'b0101111: lookTable = 32'h3f41f383; //0.757622
/* 48 1.757812*/ 7'b0110000: lookTable = 32'h3f411659; //0.754247
/* 49 1.773438*/ 7'b0110001: lookTable = 32'h3f403c1c; //0.750917
/* 50 1.789062*/ 7'b0110010: lookTable = 32'h3f3f64bd; //0.747631
/* 51 1.804688*/ 7'b0110011: lookTable = 32'h3f3e902c; //0.744387
/* 52 1.820312*/ 7'b0110100: lookTable = 32'h3f3dbe58; //0.741186
/* 53 1.835938*/ 7'b0110101: lookTable = 32'h3f3cef34; //0.738025
/* 54 1.851562*/ 7'b0110110: lookTable = 32'h3f3c22b1; //0.734904
/* 55 1.867188*/ 7'b0110111: lookTable = 32'h3f3b58c0; //0.731823
/* 56 1.882812*/ 7'b0111000: lookTable = 32'h3f3a9154; //0.728780
/* 57 1.898438*/ 7'b0111001: lookTable = 32'h3f39cc60; //0.725775
/* 58 1.914062*/ 7'b0111010: lookTable = 32'h3f3909d6; //0.722806
/* 59 1.929688*/ 7'b0111011: lookTable = 32'h3f3849aa; //0.719874
/* 60 1.945312*/ 7'b0111100: lookTable = 32'h3f378bd0; //0.716977
/* 61 1.960938*/ 7'b0111101: lookTable = 32'h3f36d03d; //0.714115
/* 62 1.976562*/ 7'b0111110: lookTable = 32'h3f3616e4; //0.711287
/* 63 1.992188*/ 7'b0111111: lookTable = 32'h3f355fba; //0.708492
/* 64 2.015625*/ 7'b1000000: lookTable = 32'h3f3450fc; //0.704361
/* 65 2.046875*/ 7'b1000001: lookTable = 32'h3f32ef41; //0.698963
/* 66 2.078125*/ 7'b1000010: lookTable = 32'h3f319589; //0.693688
/* 67 2.109375*/ 7'b1000011: lookTable = 32'h3f304387; //0.688530
/* 68 2.140625*/ 7'b1000100: lookTable = 32'h3f2ef8f2; //0.683486
/* 69 2.171875*/ 7'b1000101: lookTable = 32'h3f2db587; //0.678551
/* 70 2.203125*/ 7'b1000110: lookTable = 32'h3f2c7904; //0.673722
/* 71 2.234375*/ 7'b1000111: lookTable = 32'h3f2b432a; //0.668994
/* 72 2.265625*/ 7'b1001000: lookTable = 32'h3f2a13c0; //0.664364
/* 73 2.296875*/ 7'b1001001: lookTable = 32'h3f28ea8c; //0.659829
/* 74 2.328125*/ 7'b1001010: lookTable = 32'h3f27c759; //0.655386
/* 75 2.359375*/ 7'b1001011: lookTable = 32'h3f26a9f4; //0.651031
/* 76 2.390625*/ 7'b1001100: lookTable = 32'h3f25922c; //0.646762
/* 77 2.421875*/ 7'b1001101: lookTable = 32'h3f247fd3; //0.642575
/* 78 2.453125*/ 7'b1001110: lookTable = 32'h3f2372bd; //0.638470
/* 79 2.484375*/ 7'b1001111: lookTable = 32'h3f226abe; //0.634441
/* 80 2.515625*/ 7'b1010000: lookTable = 32'h3f2167af; //0.630488
/* 81 2.546875*/ 7'b1010001: lookTable = 32'h3f206968; //0.626608
/* 82 2.578125*/ 7'b1010010: lookTable = 32'h3f1f6fc4; //0.622799
/* 83 2.609375*/ 7'b1010011: lookTable = 32'h3f1e7aa0; //0.619059
/* 84 2.640625*/ 7'b1010100: lookTable = 32'h3f1d89d9; //0.615385
/* 85 2.671875*/ 7'b1010101: lookTable = 32'h3f1c9d4e; //0.611775
/* 86 2.703125*/ 7'b1010110: lookTable = 32'h3f1bb4e1; //0.608229
/* 87 2.734375*/ 7'b1010111: lookTable = 32'h3f1ad073; //0.604743
/* 88 2.765625*/ 7'b1011000: lookTable = 32'h3f19efe6; //0.601317
/* 89 2.796875*/ 7'b1011001: lookTable = 32'h3f191320; //0.597948
/* 90 2.828125*/ 7'b1011010: lookTable = 32'h3f183a05; //0.594635
/* 91 2.859375*/ 7'b1011011: lookTable = 32'h3f17647c; //0.591377
/* 92 2.890625*/ 7'b1011100: lookTable = 32'h3f16926c; //0.588172
/* 93 2.921875*/ 7'b1011101: lookTable = 32'h3f15c3bc; //0.585018
/* 94 2.953125*/ 7'b1011110: lookTable = 32'h3f14f857; //0.581914
/* 95 2.984375*/ 7'b1011111: lookTable = 32'h3f143026; //0.578860
/* 96 3.015625*/ 7'b1100000: lookTable = 32'h3f136b13; //0.575853
/* 97 3.046875*/ 7'b1100001: lookTable = 32'h3f12a90b; //0.572892
/* 98 3.078125*/ 7'b1100010: lookTable = 32'h3f11e9f9; //0.569976
/* 99 3.109375*/ 7'b1100011: lookTable = 32'h3f112dca; //0.567105
/*100 3.140625*/ 7'b1100100: lookTable = 32'h3f10746d; //0.564277
/*101 3.171875*/ 7'b1100101: lookTable = 32'h3f0fbdce; //0.561490
/*102 3.203125*/ 7'b1100110: lookTable = 32'h3f0f09dd; //0.558744
/*103 3.234375*/ 7'b1100111: lookTable = 32'h3f0e5889; //0.556038
/*104 3.265625*/ 7'b1101000: lookTable = 32'h3f0da9c2; //0.553372
/*105 3.296875*/ 7'b1101001: lookTable = 32'h3f0cfd79; //0.550743
/*106 3.328125*/ 7'b1101010: lookTable = 32'h3f0c539f; //0.548151
/*107 3.359375*/ 7'b1101011: lookTable = 32'h3f0bac25; //0.545595
/*108 3.390625*/ 7'b1101100: lookTable = 32'h3f0b06fd; //0.543075
/*109 3.421875*/ 7'b1101101: lookTable = 32'h3f0a641a; //0.540590
/*110 3.453125*/ 7'b1101110: lookTable = 32'h3f09c36d; //0.538138
/*111 3.484375*/ 7'b1101111: lookTable = 32'h3f0924ec; //0.535720
/*112 3.515625*/ 7'b1110000: lookTable = 32'h3f088889; //0.533333
/*113 3.546875*/ 7'b1110001: lookTable = 32'h3f07ee37; //0.530979
/*114 3.578125*/ 7'b1110010: lookTable = 32'h3f0755ed; //0.528655
/*115 3.609375*/ 7'b1110011: lookTable = 32'h3f06bf9e; //0.526361
/*116 3.640625*/ 7'b1110100: lookTable = 32'h3f062b40; //0.524097
/*117 3.671875*/ 7'b1110101: lookTable = 32'h3f0598c7; //0.521862
/*118 3.703125*/ 7'b1110110: lookTable = 32'h3f05082a; //0.519656
/*119 3.734375*/ 7'b1110111: lookTable = 32'h3f04795f; //0.517477
/*120 3.765625*/ 7'b1111000: lookTable = 32'h3f03ec5c; //0.515325
/*121 3.796875*/ 7'b1111001: lookTable = 32'h3f036117; //0.513200
/*122 3.828125*/ 7'b1111010: lookTable = 32'h3f02d788; //0.511101
/*123 3.859375*/ 7'b1111011: lookTable = 32'h3f024fa6; //0.509028
/*124 3.890625*/ 7'b1111100: lookTable = 32'h3f01c967; //0.506979
/*125 3.921875*/ 7'b1111101: lookTable = 32'h3f0144c4; //0.504956
/*126 3.953125*/ 7'b1111110: lookTable = 32'h3f00c1b4; //0.502956
/*127 3.984375*/ 7'b1111111: lookTable = 32'h3f004030; //0.500979
default : lookTable = 32'h3f800000;
endcase
endfunction
assign val = lookTable( e_snf[6:0]); //{exp_lsb,snf_uppr});
endmodule
/*
4'b0000 : lookTable = 32'h3f7c1764; //0.9847319278
4'b0001 : lookTable = 32'h3f74c867; //0.9561828875
4'b0010 : lookTable = 32'h3f6e133e; //0.92998111
4'b0011 : lookTable = 32'h3f76e3ed; //0.9058216273
4'b0100 : lookTable = 32'h3f6229ed; //0.8834522086
4'b0101 : lookTable = 32'h3f5cd76e; //0.8626621856
4'b0110 : lookTable = 32'h3f57e0cf; //0.8432740427
4'b0111 : lookTable = 32'h3f533c2e; //0.825136997
4'b1000 : lookTable = 32'h3f4ee116; //0.8081220356
4'b1001 : lookTable = 32'h3f4ac83f; //0.7921180344
4'b1010 : lookTable = 32'h3f46eb5a; //0.7770286899
4'b1011 : lookTable = 32'h3f4344e6; //0.7627700714
4'b1100 : lookTable = 32'h3f3fd012; //0.7492686493
4'b1101 : lookTable = 32'h3f3c889f; //0.7364596943
4'b1110 : lookTable = 32'h3f396ace; //0.7242859683
4'b1111 : lookTable = 32'h3f36734a; //0.7126966451
*/