-
Notifications
You must be signed in to change notification settings - Fork 3.4k
Expand file tree
/
Copy pathUrn.java
More file actions
439 lines (389 loc) · 15.5 KB
/
Urn.java
File metadata and controls
439 lines (389 loc) · 15.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
package com.linkedin.common.urn;
import com.linkedin.data.template.Custom;
import com.linkedin.util.ArgumentUtil;
import java.net.URISyntaxException;
import java.util.Collection;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import javax.annotation.Nullable;
/**
* Represents a URN (Uniform Resource Name) for a Linkedin entity, in the spirit of RFC 2141.
* Our default URN format uses the non-standard namespace identifier "li", and hence default URNs
* begin with "urn:li:". Note that the namespace according to
* <a href="https://www.ietf.org/rfc/rfc2141.txt">RFC 2141</a> [Section 2.1] is case-insensitive and
* for safety we only allow lower-case letters in our implementation.
*
* <p>Our URNs all consist of an "entity type", which denotes an internal namespace for the resource,
* as well as an entity key, formatted as a tuple of parts. The full format of a URN is:
*
* <p><URN> ::= urn:<namespace>:<entityType>:<entityKey>
*
* <p>The entity key is represented as a tuple of strings. If the tuple is of length 1, the
* key is encoded directly. If the tuple has multiple parts, the parts are enclosed in
* parenthesizes and comma-delimited, e.g., a URN whose key is the tuple [1, 2, 3] would be
* encoded as:
*
* <p>urn:li:example:(1,2,3)
*/
public class Urn {
/**
*
* @deprecated Don't create the Urn string manually, use Typed Urns or {@link #create(String entityType, Object...
* tupleParts)}
*/
@Deprecated
public static final String URN_PREFIX = "urn:li:";
private static final String URN_START = "urn:";
private static final String DEFAULT_NAMESPACE = "li";
private final String _entityType;
private final TupleKey _entityKey;
private final String _namespace;
// Used to speed up toString() in the common case where the Urn is built up
// from parsing an input string.
@Nullable
private String _cachedStringUrn;
static {
Custom.registerCoercer(new UrnCoercer(), Urn.class);
}
/**
* Customized interner for all strings that may be used for _entityType.
* Urn._entityType is by nature a pretty small set of values, such as "member",
* "company" etc. Due to this fact, when an app creates and keeps in memory a
* large number of Urn's, it may end up with a very big number of identical strings.
* Thus it's worth saving memory by interning _entityType when an Urn is instantiated.
* String.intern() would be a natural choice, but it takes a few microseconds, and
* thus may become too expensive when many (temporary) Urns are generated in very
* quick succession. Thus we use a faster CHM below. Compared to the internal table
* used by String.intern() it has a bigger memory overhead per each interned string,
* but for a small set of canonical strings it doesn't matter.
*/
private static final Map<String, String> ENTITY_TYPE_INTERNER = new ConcurrentHashMap<>();
/**
* Create a Urn given its raw String representation.
* @param rawUrn - the String representation of a Urn.
* @throws URISyntaxException - if the String is not a valid Urn.
*/
public Urn(String rawUrn) throws URISyntaxException {
ArgumentUtil.notNull(rawUrn, "rawUrn");
_cachedStringUrn = rawUrn;
if (!rawUrn.startsWith(URN_START)) {
throw new URISyntaxException(
rawUrn,
"Urn doesn't start with 'urn:'. Urn: " + rawUrn,
0);
}
int secondColonIndex = rawUrn.indexOf(':', URN_START.length() + 1);
_namespace = validateAndExtractNamespace(rawUrn, secondColonIndex);
// First char of entityType must be [a-z]
if (!charIsLowerCaseAlphabet(rawUrn, secondColonIndex + 1)) {
throw new URISyntaxException(
rawUrn,
"First char of entityType must be [a-z]! Urn: " + rawUrn,
secondColonIndex + 1);
}
int thirdColonIndex = rawUrn.indexOf(':', secondColonIndex + 2);
// Case: urn:li:foo
if (thirdColonIndex == -1) {
_entityType = rawUrn.substring(secondColonIndex + 1);
if (!charsAreWordClass(_entityType)) {
throw new URISyntaxException(
rawUrn,
"entityType must have only [a-zA-Z0-9] chars. Urn: " + rawUrn);
}
_entityKey = new TupleKey();
return;
}
String entityType = rawUrn.substring(secondColonIndex + 1, thirdColonIndex);
if (!charsAreWordClass(entityType)) {
throw new URISyntaxException(
rawUrn,
"entityType must have only [a-zA-Z_0-9] chars. Urn: " + rawUrn);
}
int numEntityKeyChars = rawUrn.length() - (thirdColonIndex + 1);
if (numEntityKeyChars <= 0) {
throw new URISyntaxException(
rawUrn,
"Urns with empty entityKey are not allowed. Urn: " + rawUrn);
}
_entityType = internEntityType(entityType);
_entityKey = TupleKey.fromString(rawUrn, thirdColonIndex + 1);
// For the sake of backwards compatibility, we must ensure that
// new Urn("urn:li:y:(urn:li:z:1)").toString() == "urn:li:y:urn:li:z:1"
// Thus, if we detect a TupleKey with 1 part AND we had a paren in the
// input, we abort our optimization of storing the original URN.
if (_entityKey.size() == 1 && rawUrn.charAt(thirdColonIndex + 1) == '(') {
_cachedStringUrn = null;
}
}
/**
* Create a Urn from an entity type and an encoded String key. The key is converted to a
* Tuple by parsing using @see TupleKey#fromString
*
* @param entityType - the entity type for the Urn
* @param typeSpecificString - the encoded string representation of a TupleKey
* @throws URISyntaxException if the typeSpecificString is not a valid encoding of a TupleKey
*/
public Urn(String entityType, String typeSpecificString) throws URISyntaxException {
this(DEFAULT_NAMESPACE, entityType, TupleKey.fromString(typeSpecificString));
}
public Urn(String entityType, TupleKey entityKey) {
this(DEFAULT_NAMESPACE, entityType, entityKey);
}
public Urn(String namespace, String entityType, TupleKey entityKey) {
_namespace = namespace;
_entityType = entityType;
_entityKey = entityKey;
_cachedStringUrn = null;
}
/**
* DEPRECATED - use {@link #createFromTuple(String, Object...)}
* Create a Urn from an entity type and a sequence of key parts. The key parts are converted
* to a tuple using @see TupleKey#create
*
* @param entityType - the entity type for the Urn
* @param tupleParts - a sequence of objects representing the key of the Urn
* @return - a new Urn object
*/
@Deprecated
public static Urn create(String entityType, Object... tupleParts) {
return new Urn(entityType, TupleKey.create(tupleParts));
}
/**
* DEPRECATED - use {@link #createFromTuple(String, java.util.Collection)}
* Create a Urn from an entity type and a sequence of key parts. The key parts are converted
* to a tuple using @see TupleKey#create
*
* @param entityType - the entity type for the Urn
* @param tupleParts - a sequence of objects representing the key of the Urn
* @return - a new Urn object
*/
@Deprecated
public static Urn create(String entityType, Collection<?> tupleParts) {
return new Urn(entityType, TupleKey.create(tupleParts));
}
/**
* Create a Urn from an entity type and a sequence of key parts. The key parts are converted
* to a tuple using @see TupleKey#create
*
* @param entityType - the entity type for the Urn
* @param tupleParts - a sequence of objects representing the key of the Urn
* @return - a new Urn object
*/
public static Urn createFromTuple(String entityType, Object... tupleParts) {
return new Urn(entityType, TupleKey.create(tupleParts));
}
/**
* Create a Urn from an namespace, entity type and a sequence of key parts. The key parts are converted
* to a tuple using @see TupleKey#create
*
* @param namespace - The namespace of this urn.
* @param entityType - the entity type for the Urn
* @param tupleParts - a sequence of objects representing the key of the Urn
* @return - a new Urn object
*/
public static Urn createFromTupleWithNamespace(String namespace, String entityType, Object... tupleParts) {
return new Urn(namespace, entityType, TupleKey.create(tupleParts));
}
/**
* Create a Urn from an entity type and a sequence of key parts. The key parts are converted
* to a tuple using @see TupleKey#create
*
* @param entityType - the entity type for the Urn
* @param tupleParts - a sequence of objects representing the key of the Urn
* @return - a new Urn object
*/
public static Urn createFromTuple(String entityType, Collection<?> tupleParts) {
return new Urn(entityType, TupleKey.create(tupleParts));
}
/**
* Create a Urn given its raw String representation.
* @param rawUrn - the String representation of a Urn.
* @throws URISyntaxException - if the String is not a valid Urn.
*/
public static Urn createFromString(String rawUrn) throws URISyntaxException {
return new Urn(rawUrn);
}
/**
* Create a Urn given its raw CharSequence representation.
* @param rawUrn - the Char Sequence representation of a Urn.
* @throws URISyntaxException - if the String is not a valid Urn.
*/
public static Urn createFromCharSequence(CharSequence rawUrn) throws URISyntaxException {
ArgumentUtil.notNull(rawUrn, "rawUrn");
return new Urn(rawUrn.toString());
}
/**
* Create a Urn from an entity type and an encoded String key. The key is converted to a
* Tuple by parsing using @see TupleKey#fromString
*
* @param entityType - the entity type for the Urn
* @param typeSpecificString - the encoded string representation of a TupleKey
* @throws URISyntaxException if the typeSpecificString is not a valid encoding of a TupleKey
*/
public static Urn createFromTypeSpecificString(String entityType, String typeSpecificString)
throws URISyntaxException {
return new Urn(entityType, typeSpecificString);
}
public String getEntityType() {
return _entityType;
}
public String getNamespace() {
return _namespace;
}
public TupleKey getEntityKey() {
return _entityKey;
}
/**
* Convenience method to get the key's first tuple element as a String
*
* @return key's first tuple element
*/
public String getId() {
return _entityKey.getAs(0, String.class);
}
/**
* Convenience method to get the key's first tuple element as an Integer
*
* @return key's first tuple element, coerced to Integer
*/
public Integer getIdAsInt() {
return _entityKey.getAs(0, Integer.class);
}
/**
* Convenience method to get the key's first tuple element as a Long
*
* @return key's first tuple element, coerced to Long
*/
public Long getIdAsLong() {
return _entityKey.getAs(0, Long.class);
}
public Urn getIdAsUrn() {
return _entityKey.getAs(0, Urn.class);
}
/**
* Return the namespace-specific string portion of this URN, i.e.,
* everything following the "urn:<namespace>:" prefix.
*
* @return The namespace-specific string portion of this URN
*/
public String getNSS() {
return _entityType + (_entityKey.size() > 0 ? ':' + _entityKey.toString() : "");
}
@Override
public String toString() {
if (_cachedStringUrn != null) {
return _cachedStringUrn;
}
// This can be written to by multiple threads, but that's actually safe
// because Urn is immutable and all the threads will compute the same
// logical String (even though they may produce different String objects).
// So whichever thread "wins" the write race, the result is the same.
// This field also doesn't need to be volatile for memory visibility
// because it's just a cache, so if one thread sees a null here while
// another sees non-null, it's still fine: the thread seeing non-null
// uses the cache and the other thread computes a "new" value for the
// field which is again the same logical String.
_cachedStringUrn = URN_START + _namespace + ':' + getNSS();
return _cachedStringUrn;
}
@Override
public boolean equals(Object obj) {
if (obj == null || !Urn.class.isAssignableFrom(obj.getClass())) {
return false;
}
Urn other = (Urn) obj;
return _entityType.equals(other._entityType)
&& _entityKey.equals(other._entityKey)
&& _namespace.equals(other._namespace);
}
@Override
public int hashCode() {
final int prime = 31;
int result = _entityType.hashCode();
result = prime * result + _entityKey.hashCode();
return result;
}
private static String validateAndExtractNamespace(String rawUrn,
int secondColonIndex)
throws URISyntaxException {
if (!charIsLowerCaseAlphabet(rawUrn, URN_START.length())) {
throw new URISyntaxException(
rawUrn,
"First char of Urn namespace must be [a-z]! Urn: " + rawUrn,
URN_START.length());
}
if (secondColonIndex == -1) {
throw new URISyntaxException(
rawUrn,
"Missing second ':' char. Urn: " + rawUrn);
}
int namespaceLen = secondColonIndex - URN_START.length();
if (namespaceLen > 32) {
throw new URISyntaxException(
rawUrn,
"Namespace length > 32 chars. Urn: " + rawUrn,
secondColonIndex);
}
if (namespaceLen == 2
&& rawUrn.charAt(URN_START.length()) == 'l'
&& rawUrn.charAt(URN_START.length() + 1) == 'i') {
// We want to avoid an allocation for the ultra-common "li" namespace!
return DEFAULT_NAMESPACE;
}
String namespace = rawUrn.substring(URN_START.length(), secondColonIndex);
if (!charsAreValidNamespace(namespace)) {
throw new URISyntaxException(
rawUrn,
"Chars in namespace must be [a-z0-9-]!. Urn: " + rawUrn);
}
return namespace;
}
// Not using Character.isLowerCase on purpose because that is unicode-aware
// and we only need ASCII. Handling only ASCII is faster.
private static boolean charIsLowerCaseAlphabet(String input, int index) {
if (index >= input.length()) {
return false;
}
char c = input.charAt(index);
return c >= 'a' && c <= 'z';
}
// These are [a-z0-9-]
private static boolean charsAreValidNamespace(String input) {
for (int index = 0; index < input.length(); index++) {
char c = input.charAt(index);
// Not using Character.isLowerCase etc on purpose because that is
// unicode-aware and we only need ASCII. Handling only ASCII is faster.
if (!((c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '-')) {
return false;
}
}
return true;
}
// Regex word class (\w) is defined as: [a-zA-Z_0-9]
// Source: https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.html
private static boolean charsAreWordClass(String input) {
for (int index = 0; index < input.length(); index++) {
char c = input.charAt(index);
// Not using Character.isLowerCase etc on purpose because that is
// unicode-aware and we only need ASCII. Handling only ASCII is faster.
if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
|| (c >= '0' && c <= '9') || c == '_')) {
return false;
}
}
return true;
}
/**
* Intern a string to be assigned to the _entityType field.
*/
private static String internEntityType(String et) {
// Most of the times this method is called, the canonical string is already
// in the table, so let's do a quick get() first.
String canonicalET = ENTITY_TYPE_INTERNER.get(et);
if (canonicalET != null) {
return canonicalET;
}
canonicalET = ENTITY_TYPE_INTERNER.putIfAbsent(et, et);
return canonicalET != null ? canonicalET : et;
}
}