#include <bits/stdc++.h>
using namespace std;
// #undef _GLIBCXX_DEBUG // disable run-time bound checking, etc
// #pragma GCC optimize("Ofast,inline") // Ofast = O3,fast-math,allow-store-data-races,no-protect-parens
// #pragma GCC optimize ("unroll-loops")
// #pragma GCC target("bmi,bmi2,lzcnt,popcnt") // bit manipulation
// #pragma GCC target("movbe") // byte swap
// #pragma GCC target("aes,pclmul,rdrnd") // encryption
// #pragma GCC target("avx,avx2,f16c,fma,sse3,ssse3,sse4.1,sse4.2") // SIMD
// #include <bits/extc++.h>
// using namespace __gnu_pbds;
// template<class T>using ordered_set = tree<T, null_type, less<T>, rb_tree_tag,tree_order_statistics_node_update>;
// template<class T>using ordered_multiset = tree<T, null_type, less_equal<T>, rb_tree_tag,tree_order_statistics_node_update>;
#define ll long long
#define INF ((ll)(1e9+7))
#define fo(i, n) for(ll i=0;i<((ll)n);i++)
#define deb(x) cout << #x << " = " << (x) << endl;
#define deb2(x, y) cout << #x << " = " << (x) << ", " << #y << " = " << (y) << endl
#define pb push_back
#define mp make_pair
#define F first
#define S second
#define LSOne(S) ((S) & (-S))
#define all(x) x.begin(), x.end()
#define rall(x) x.rbegin(), x.rend()
inline int readint(){ int v = 0; char c; while((c = getchar()) != EOF && c != ' ' && c != '\n'){ v *= 10; v += c - '0'; } return v; }
inline int readintsigned() { int v = 0; int sign = 1; char c = getchar(); if (c == '-') { sign = -1; } else { v += c - '0'; } while ((c = getchar()) != EOF && c != ' ' && c != '\n') { v *= 10; v += c - '0'; } return v * sign; }
inline string readstring() { string s; char c; while ((c = getchar()) != EOF && c != '\n' && c != ' ') { s.push_back(c); } return s; }
template <class result_t=std::chrono::milliseconds,class clock_t=std::chrono::steady_clock,class duration_t = std::chrono::milliseconds>
auto since(std::chrono::time_point<clock_t, duration_t> const& start){return std::chrono::duration_cast<result_t>(clock_t::now() - start);}
typedef pair<int, int> pii;
typedef pair<ll, ll> pl;
typedef vector<int> vi;
typedef vector<ll> vl;
typedef vector<pii> vpii;
typedef vector<pl> vpl;
typedef vector<vi> vvi;
typedef vector<vl> vvl;
typedef vector<vpii> vvpii;
typedef vector<vpl> vvpl;
vl v;
// vvl memo;
ll n;
ll best;
vl current;
bool check(){
vl res(n, 0);
fo(i, n){
if(res[(i+v[i])%n]){
if(i>best){
current = v;
best = i;
}
return 0;
}
res[(i+v[i])%n] = 1;
}
return 1;
}
int main(){
cin.tie(0)->sync_with_stdio(0);
ll t, tt;
cin >> t;
tt = t;
ll startTime = chrono::steady_clock::now().time_since_epoch().count();
srand(50);
while(t--){
cin >> n;
v.assign(n, 0);
// current.assign(n, 0);
ll currentSum = 0;
fo(i, n){
cin >> v[i];
currentSum+=v[i];
}
bool fail = 1;
best = -1;
// deb(currentSum)
if(currentSum%n != 0){
cout << "NO\n";
continue;
}
srand(1337);
shuffle(all(v), rand);
current = v;
while(true){
if(check()){
break;
}
v = current;
fo(ii, rand()%n) swap(v[best], v[rand()%n]);
}
cout << "YES\n";
fo(i, n) cout << v[i] << " ";
cout << "\n";
}
return 0;
}